Genome wide analyses

Load packages and data

Note that the MASS package is required to run the CPASSOC. Unfortunately this clashes with the dplyr select(). So be prepared to run detach("package:MASS", unload=TRUE) or use dplyr::select() to get some things to work if you’re moving back and forth throughout the code.

Show the code
library(tidyverse) # tidy coding, ggplot etc
library(glue) # for coding within strings
library(bigsnpr) # to install: devtools::install_github("privefl/bigsnpr")
library(pander) # for tables
library(brms) # for bayesian models
library(ggtext) # for markdown syntax in ggplot
library(MetBrewer) # for more colour palettes
library(MoMAColors) # nicer colours once again
library(PNWColors) # further nicer colours
library(patchwork) # building composite plots
library(DT) # for nice tables
library(kableExtra) # for more nice tables
library(ggrepel) # for labelling ggplots
library(pheatmap) # for heat maps
library(MASS) # needed for CPASSOC
library(Matrix) # needed for CPASSOC

# build a helper function that produces a table to display our data

# Create a function to build HTML searchable tables

my_data_table <- function(df){
  datatable(
    df, rownames=FALSE,
    autoHideNavigation = TRUE,
    extensions = c("Scroller",  "Buttons"),
    options = list(
      autoWidth = TRUE,
      dom = 'Bfrtip',
      deferRender=TRUE,
      scrollX=TRUE, scrollY=1000,
      scrollCollapse=TRUE,
      buttons =
        list('pageLength', 'colvis', 'csv', list(
          extend = 'pdf',
          pageSize = 'A4',
          orientation = 'landscape',
          filename = 'Lifespan_data')),
      pageLength = 100
    )
  )
}

Load variant/gene annotations

DGRP variant annotations were downloaded from the DGRP website and gene annotations for all the genes covered by DGRP variants, from the org.Dm.eg.db database object from Bioconductor.

These will be useful later when we aim to identify whether variants with notable associations with a trait overlap with any genes.

Show the code
# Helper function to split a vector into chunks 
chunker <- function(x, max_chunk_size) split(x, ceiling(seq_along(x) / max_chunk_size))

if(!file.exists("data/derived/annotations.csv")){
  
  # Load annotation file, get important info
  
  annot <- read.table("data/input/dgrp.fb557.annot.txt", header = FALSE, stringsAsFactors = FALSE)
  
  get.info <- function(rows){
    lapply(rows, function(row){
      site.class.field <- strsplit(annot$V3[row], split = "]")[[1]][1]
      num.genes <- str_count(site.class.field, ";") + 1
      output <- cbind(rep(annot$V1[row], num.genes), 
                      do.call("rbind", lapply(strsplit(site.class.field, split = ";")[[1]], 
                                              function(x) strsplit(x, split = "[|]")[[1]])))
      if(ncol(output) == 5) return(output[,c(1,2,4,5)]) # only return SNPs that have some annotation. Don't get the gene symbol
      else return(NULL)
    }) %>% do.call("rbind", .)
  }
  
  variant.details <- lapply(chunker(1:nrow(annot), max_chunk_size = 10000), get.info) %>% 
    do.call("rbind", .) %>% as.data.frame()
  
  names(variant.details) <- c("SNP", "FBID", "site.class", "distance.to.gene")
  variant.details$FBID <- unlist(str_extract_all(variant.details$FBID, "FBgn[:digit:]+")) # clean up text strings for Flybase ID
  variant.details %>%
    dplyr::filter(site.class != "FBgn0003638") %>% # NB this is a bug in the DGRP's annotation file
    mutate(chr = str_remove_all(substr(SNP, 1, 2), "_")) # get chromosome now for faster sorting later
  
  annotations <- variant.details
} else annotations <- read_csv("data/derived/annotations.csv")

annotations <-
  annotations %>% 
  left_join(read.csv("data/Input/all_dmel_genes.csv")) %>% 
  dplyr::select(SNP, FBID, site.class, distance.to.gene, gene_name, chromosome)

Loading data used in GWA tests

In the demographic component of this study, we calculated mean values and standard error for each combination of line, sex, study, temperature and mating status. These data are displayed, and can be downloaded from, the below table. Note that for GWA and other SNP based analysis, we removed lines that had not been genotyped (shown as Genotyped = NO). Lines with unknown genotypes also have unknown Wolbachia and inversions status’. Durham et al (2014) cleared all lines of Wolbachia via treatment with tetracycline.

Show the code
genotyped_lines <- 
  read_csv("data/input/Genotyped_lines.csv") %>% 
  mutate(Genotyped = "YES",
         line = as.factor(line))
  
full_dataset <- 
  read.csv("data/input/lifespan_data.csv") %>% 
  as_tibble() %>% 
  mutate(line = as.factor(Line),
         Treatment = str_replace(Treatment, " ", "_")) %>%
  dplyr::select(-Line) %>% 
  left_join(genotyped_lines, by = "line") %>% 
  mutate(Genotyped = if_else(is.na(Genotyped), "NO", Genotyped)) %>% 
  dplyr::select(line, Sex, Temperature, Mated, Study, Treatment, Block, e0, SE_e0, h, SE_h, samp, Genotyped)

# DGRP studies often correct for the most common inversions and wolbachia presence. 

inversions_wolbachia <- 
  read_csv("data/Input/inversions_wolbachia.csv") %>%
  mutate(line = as.factor(str_remove(line, "DGRP_")),
         Wolbachia = if_else(Wolbachia == "y", 1, 0),
         across(2:17, ~ case_when(.x == "ST" ~ 0,
                                 .x == "INV/ST" ~ 1,
                                 .x == "INV" ~ 2))) %>% 
  dplyr::select(line, `In(2L)t`, `In(2R)NS`, `In(3R)P`, `In(3R)K`, `In(3R)Mo`, Wolbachia) %>% 
  rename(In_2L_t = `In(2L)t`,
         In_2R_NS = `In(2R)NS`,
         In_3R_P = `In(3R)P`,
         In_3R_K = `In(3R)K`,
         In_3R_Mo = `In(3R)Mo`)
# inversions pruned to those Huang et al 2015 PNAS corrected for

full_dataset <- 
  full_dataset %>% 
  left_join(inversions_wolbachia) %>% 
  mutate(Wolbachia = if_else(Study == "Durham_2014", 0, Wolbachia)) # study cleared wolbachia with tetracycline before phenotyping 
  
my_data_table(full_dataset %>% 
                mutate(across(8:11, ~ round(.x, 2))) %>% 
                dplyr::select(1:13))

For GWAS and later CPASSOC, we split the data by study, removed studies that phenotyped < 100 lines and adjusted line means to account for experimental block where applicable. Importantly, we also split the Wilson et al (2020) data by dietary treatment; while we do not explicitly consider diet in our analysis, lifespan in one dietary treatment is considered a separate trait from lifespan measured in a second dietary treatment.

Show the code
Arya_2010_f <-
  full_dataset %>% 
  filter(Study == "Arya_2010" & Sex == "Female" & Genotyped == "YES") %>% 
  mutate(e0_scaled = scale(e0),
         h_scaled = scale(h)) %>% 
  dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)

Arya_2010_m <-
  full_dataset %>% 
  filter(Study == "Arya_2010" & Sex == "Male" & Genotyped == "YES") %>% 
  mutate(e0_scaled = scale(e0),
         h_scaled = scale(h)) %>% 
  dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)

Huang_2020_f_18 <-
  full_dataset %>% 
  filter(Study == "Huang_2020" & Sex == "Female" & Temperature == 18 & Genotyped == "YES") %>% 
  mutate(e0_scaled = scale(e0),
         h_scaled = scale(h)) %>% 
  dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)

Huang_2020_m_18 <-
  full_dataset %>% 
  filter(Study == "Huang_2020" & Sex == "Male" & Temperature == 18 & Genotyped == "YES") %>% 
  mutate(e0_scaled = scale(e0),
         h_scaled = scale(h)) %>% 
  dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)

Huang_2020_f_25 <-
  full_dataset %>% 
  filter(Study == "Huang_2020" & Sex == "Female" & Temperature == 25 & Genotyped == "YES") %>% 
  mutate(e0_scaled = scale(e0),
         h_scaled = scale(h)) %>% 
  dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)

Huang_2020_m_25 <-
  full_dataset %>% 
  filter(Study == "Huang_2020" & Sex == "Male" & Temperature == 25 & Genotyped == "YES") %>% 
  mutate(e0_scaled = scale(e0),
         h_scaled = scale(h)) %>% 
  dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)

Huang_2020_f_28 <-
  full_dataset %>% 
  filter(Study == "Huang_2020" & Sex == "Female" & Temperature == 28 & Genotyped == "YES") %>% 
  mutate(e0_scaled = scale(e0),
         h_scaled = scale(h)) %>% 
  dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)

Huang_2020_m_28 <-
  full_dataset %>% 
  filter(Study == "Huang_2020" & Sex == "Male" & Temperature == 28 & Genotyped == "YES") %>% 
  mutate(e0_scaled = scale(e0),
         h_scaled = scale(h)) %>% 
  dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)

# In this study, some lines were measured twice per treatment, and a small subset were measured three times. We take the mean across blocks as the line mean, following the original study.

Wilson_2020_f_1 <-
  full_dataset %>% 
  filter(Treatment == "Wilson_2020_1" & Genotyped == "YES") %>%
  group_by(line) %>% 
  mutate(e0 = mean(e0),
         h = mean(h)) %>% 
  ungroup() %>% 
  distinct(line, .keep_all = TRUE) %>%
  mutate(e0_scaled = scale(e0),
         h_scaled = scale(h)) %>% 
  dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)

Wilson_2020_f_2 <-
  full_dataset %>% 
  filter(Treatment == "Wilson_2020_2" & Genotyped == "YES") %>% 
  group_by(line) %>% 
  mutate(e0 = mean(e0),
         h = mean(h)) %>% 
  ungroup() %>% 
  distinct(line, .keep_all = TRUE) %>%
  mutate(e0_scaled = scale(e0),
         h_scaled = scale(h)) %>% 
  dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)

Durham_2014_f <-
  full_dataset %>% 
  filter(Study == "Durham_2014" & Genotyped == "YES") %>% 
  mutate(Block = as.factor(Block))

# statistically account for the effect of block

Durham_model_e0 <-
  brm(e0 ~ 1 + Block + (1|line),
      data = Durham_2014_f, family = gaussian,
      prior = c(prior(normal(50, 15), class = Intercept),
                prior(normal(0, 10), class = b),
                prior(exponential(1), class = sigma)),
      chains = 4, cores = 4, iter = 6000, warmup = 2000,
      seed = 1, file = "fits/Durham_model_e0")

Durham_model_h <-
  brm(h ~ 1 + Block + (1|line),
      data = Durham_2014_f, family = gaussian,
      prior = c(prior(exponential(1), class = Intercept),
                prior(normal(0, 1), class = b),
                prior(exponential(1), class = sigma)),
      chains = 4, cores = 4, iter = 6000, warmup = 2000,
      seed = 1, file = "fits/Durham_model_h")

Durham_2014_f_adjusted <- 
  as_draws_df(Durham_model_e0) %>% 
  dplyr::select(b_Intercept, contains("r_line")) %>% 
  # mutate each draw to show the lines standardised lifespan in Block 1 (denoted b_Intercept in brms output)
  mutate(across(2:177, ~ .x + b_Intercept)) %>%  
  dplyr::select(-b_Intercept) %>% 
  pivot_longer(cols = everything(), names_to = "line", values_to = "e0") %>% 
  group_by(line) %>% 
  summarise(e0 = mean(e0)) %>% 
  mutate(line = str_remove(line, fixed("r_line[")),
         line = str_remove(line, fixed(",Intercept]"))) %>% 
  left_join(
    as_draws_df(Durham_model_h) %>% 
      dplyr::select(b_Intercept, contains("r_line")) %>% 
      # mutate each draw to show the lines standardised lifespan in Block 1 (denoted b_Intercept in brms output)
      mutate(across(2:177, ~ .x + b_Intercept)) %>%  
      dplyr::select(-b_Intercept) %>% 
      pivot_longer(cols = everything(), names_to = "line", values_to = "h") %>% 
      group_by(line) %>% 
      summarise(h = mean(h)) %>% 
      mutate(line = str_remove(line, fixed("r_line[")),
             line = str_remove(line, fixed(",Intercept]")))
  ) %>% 
  left_join(Durham_2014_f %>% 
              distinct(line, .keep_all = T) %>% 
              dplyr::select(line, Sex, Temperature, Mated, Treatment)) %>% 
    mutate(e0_scaled = scale(e0),
         h_scaled = scale(h))

Patel_2021_f <-
  full_dataset %>% 
  filter(Study == "Patel_2021" & Genotyped == "YES") %>% 
  mutate(e0_scaled = scale(e0),
         h_scaled = scale(h)) %>% 
  dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)

Preparing for univariate GWAS

The preparation of data for GWAS generally follows Holman and Wong’s DGRP GWAS of fitness in different contexts. See their associated workflowr report for a comprehensive breakdown of their analysis.

Install neccessary software and build helper functions

In addition to the R packages we load, plink 1.9 and beagle must also be installed. These software packages allow us to wrangle the data into the correct format and impute missing values, both of which are required to run GWAS with the Gemma R package (I’m still using PLINK for GWAS right now).

Show the code
# build functions to prepare data for GWAS

prep_for_e0_GWAS <- function(data, sex){
data %>% 
  mutate(line = glue("line{line}")) %>% 
  dplyr::select(line, e0_scaled)
}

prep_for_h_GWAS <- function(data, sex){
data %>% 
  #inner_join(read_csv("data/Input/Genotyped_lines.csv"), by = "line") %>%  # filter to lines that have been genotyped
  mutate(line = glue("line{line}")) %>% 
  dplyr::select(line, h) %>% 
    mutate(h = scale(h))
}

prep_for_SA_GWAS <- function(data){
data %>% 
  dplyr::select(line, SA_axis)
}

prep_for_SC_GWAS <- function(data){
data %>% 
  dplyr::select(line, SC_axis)
}

prep_for_ageing_GWAS <- function(data){
  data %>%
    mutate(line = glue("line{line}")) %>% 
    dplyr::select(line, ageing_axis)
}

prep_for_scaling_GWAS <- function(data){
  data %>%
    mutate(line = glue("line{line}")) %>% 
    dplyr::select(line, scaling_axis)
}

# I used bigsnpr::download_plink(dir = "code/windows") which puts it in the code folder - I'm using a windows operating system. The macOS version can also be downloaded into "code/macOS" 

# Beagle is a software package for phasing genotypes and imputing ungenotyped markers.

plink <- paste(getwd(), "code/windows/plink", sep = "/") #windows
#plink <- paste(getwd(), "code/macOS/plink", sep = "/") #macOS
#beagle <- bigsnpr::download_beagle(
#    dir = "/Users/tkeaney/Library/CloudStorage/OneDrive-JGU/Postdoc/DGRP_lifespan/DGRP_lifespan_inequality/code/macOS") 
# only need to download this once - change path depending on operating system


# helper function to pass commands to the terminal
# Note that we set `intern = TRUE`, and pass the result of `system()` to `cat()`, 
# ensuring that the Terminal output will be printed in this knitr report.
# 
# This is the mac OS function
 
#run_command_mac <- function(shell_command, wd = getwd(), path = ""){
 # cat(system(glue("cd ", wd, path, # tell terminal which directory to work in
  #                "\n",shell_command), # on a second terminal line, run the plink command
   #          intern = TRUE), sep = '\n')  
#}

# This is the windows function 

run_command_windows <- function(plink_command, wd = getwd(), path = "") {
  # Specify the full path to the plink executable within the 'code' subdirectory.
  plink_path <- paste(getwd(), "code/windows/plink", sep = "/")
  
  # Create the full shell command with the plink executable.
  command <- glue("cmd.exe /c cd /d {shQuote(file.path(wd, path))} && {shQuote(plink_path)} {plink_command}")
  
  # Execute the combined command.
  result <- system(command, intern = TRUE)
  
  # Print the result.
  cat(result, sep = '\n')
  
  # Return the result as a character vector.
  return(result)
}

# sometimes we need to run terminal commands without plink - create a slightly different function to do this

run_command_no_plink <- function(shell_command, wd = getwd(), path = "") {
  
  # Create the full shell command with the plink executable.
  command <- glue("cmd.exe /c cd /d {shQuote(file.path(wd, path))} && {shell_command}")
  
  # Execute the combined command.
  result <- system(command, intern = TRUE)
  
  # Print the result.
  cat(result, sep = '\n')
  
  # Return the result as a character vector.
  return(result)
}

Perform SNP quality control and impute missing data

Plink recognises three types of files that are necessary for GWA analysis: the .bed, .bim and .fam files.

.bed: the binary biallelic genotype table. Four options are possible:

  • 00 = homozygous for minor allele
  • 01 = missing genotype
  • 10 = heterozygous genotype
  • 11 = homozygous for major allele

The overwhelming majority of genotypes in the DGRP are homozygous for one of the alleles.

.bim: extended variant information file accompanying the .bed file. It has six fields:

  1. chromosome code

  2. variant identifier

  3. position in morgans

  4. base-pair coordinate

  5. Minor allele

  6. Major allele

.fam: Plink sample information file. It can have the following elements:

  1. Family ID (‘FID’) (in our case just the DGRP line)

  2. Within-family ID (‘IID’; cannot be ‘0’) (in our case just the DGRP line)

  3. Within-family ID of father (‘0’ if father isn’t in dataset)

  4. Within-family ID of mother (‘0’ if mother isn’t in dataset)

  5. Sex code (‘1’ = male, ‘2’ = female, ‘0’ = unknown) - not important for us because we analyse the sexes separately.

  6. Phenotype value (‘1’ = control, ‘2’ = case, ‘-9’/‘0’/non-numeric = missing data) - 9 for us because we supply the phenotypic data later

We cleaned up the DGRP’s .bed/.bim/.fam files (available from the Mackay lab website) by:

  1. Removing any SNPs for which genotypes are missing for >10% of the 205 DGRP lines. We then use the software Beagle to impute the remaining missing genotypes. Imputation takes a long time so ideally, you only want to do it once.

  2. Removing SNPs with a minor allele frequency of less than 5% across the 205 lines. We have negligible power to detect associations for rare SNPs below this threshold.

In the Plink-formatted genotype files, lines fixed for the major allele are coded as 2, and lines fixed for the minor allele as 0. This means that in the association tests we calculate, negative effect sizes mean that the minor allele is associated with lower trait values, while positive effect sizes means that the minor allele is associated with higher trait values.

Show the code
Run_function <- FALSE # Change this to TRUE to run - read through the code before you do this 

if(Run_function){
  
  # Use Plink to clean and subset the DGRP's SNP data as follows:
  # Only keep SNPs for which at least 90% of the 205 DGRP lines were successfully genotyped (--geno 0.1)
  # Only keep SNPs with a minor allele frequency of 0.05 or higher (--maf 0.05) across the 205 lines
  # Write the processed BIM/BED/FAM files to the data/Derived/plink_output directory
  
  output_directory <-  paste(getwd(), "data/Derived/plink_output", sep = "/")
  
  run_command_windows(glue("--bfile dgrp2",
                   " --geno 0.1 --maf 0.05 --allow-no-sex", 
                   " --make-bed --out {shQuote(output_directory)}/dgrp2_QC_all_lines"), path = "data/Input/bfiles/")
  
  # Use the shell command 'sed' to remove underscores from the DGRP line names in the .fam file (e.g. 'line_120' becomes 'line120')
  # Otherwise, these underscores cause trouble when we need to convert from PLINK to vcf format (vcf format uses underscore as a separator)
  # this works on mac but not on windows
  for(i in 1:2) run_command_no_plink("sed -i '' 's/_//' dgrp2_QC_all_lines.fam", path = "/data/Derived/")
                
  
  # Now impute the missing genotypes using Beagle
  # This part uses the data for the full DGRP panel of >200 lines, to infer missing genotypes as accurately as possible. 
  # The bigsnpr package provides a helpful wrapper for Beagle called snp_beagleImpute(): it translates to a VCF file and back again using PLINK
  # Imputation with the below optimisation took about an hour on my computer, which is not especially powerful
  snp_beagleImpute(beagle, plink, 
                   bedfile.in = "data/Derived/plink_output/dgrp2_QC_all_lines.bed", 
                   bedfile.out = "data/Derived/plink_output/dgrp2_QC_all_lines_imputed.bed",
                   ncores = 4, 
                   memory.max = 16)
  
  # assign a sex of 'female' to all the DGRP lines (Beagle removes the sex, and it seems PLINK needs individuals to have a sex, 
  # despite PLINK having a command called --allow-no-sex)
  run_command_windows("sed -i '' 's/    0   0   0/  0   0   2/' dgrp2_QC_all_lines_imputed.fam", path = "/data/Derived/plink_output/")
  
  # Re-write the .bed file, to make sure the MAF threshold and minor/major allele designations are correctly assigned post-Beagle
  run_command_windows(glue("--bfile dgrp2_QC_all_lines_imputed",
                   " --geno 0.1 --maf 0.05 --allow-no-sex", 
                   " --make-bed --out dgrp2_QC_all_lines_imputed_correct"), path = "/data/Derived/plink_output/")
  #unlink(list.files("data/derived", pattern = "~", full.names = TRUE)) # delete the original files, which were given a ~ file name by PLINK
} else "Already run"
[1] "Already run"

Build GWAS function

Show the code
run_GWAS <- function(phenotypes){
  
  # Make a list of the lines in our sample and save as a text file for passing to PLINK
  lines_to_keep <- phenotypes %>% dplyr::select(line) %>% mutate(line_2 = line)
  write.table(lines_to_keep, 
              row.names = FALSE, 
              col.names = FALSE, 
              file = "data/Derived/plink_output/lines_to_keep.txt", 
              quote = FALSE)

  # Now cull the PLINK files to just the lines that we measured, and re-apply the 
  # MAF cut-off of 0.05 for the new smaller sample of DGRP lines
  run_command_windows(glue("--bfile dgrp2_QC_all_lines_imputed_correct",
                   " --keep-allele-order", # force PLINK to retain the major/minor allele designations that apply to the DGRP as a whole
                   " --keep lines_to_keep.txt --geno 0.1 --maf 0.05", 
                   " --make-bed --out dgrp2_QC_focal_lines"), path = "/data/Derived/plink_output/")
  
    # Define a function to add our phenotype data to a .fam file, which is needed for GWAS analysis and to make sure PLINK includes these samples
  # The 'phenotypes' data frame needs to have a column called 'line'
  add_phenotypes_to_fam <- function(filepath, phenotypes){
    read_delim(filepath, col_names = FALSE, delim = " ") %>% 
      dplyr::select(X1, X2, X3, X4, X5) %>% # Get all the non-phenotype columns
      left_join(phenotypes, 
                by = c("X1" = "line")) %>%
      write.table(file = "data/Derived/plink_output/dgrp2_QC_focal_lines_NEW.fam", 
                  col.names = FALSE, row.names = FALSE, 
                  quote = FALSE, sep = " ")
    
    unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.fam")
    file.rename("data/Derived/plink_output/dgrp2_QC_focal_lines_NEW.fam", 
                "data/Derived/plink_output/dgrp2_QC_focal_lines.fam")
  }
  
  # edit the new FAM file to add the phenotype data from 'phenotypes'
  add_phenotypes_to_fam("data/Derived/plink_output/dgrp2_QC_focal_lines.fam", phenotypes)
  
  # Run basic GWAS 
  
  run_command_windows("--bfile dgrp2_QC_focal_lines  --assoc --maf 0.05 --allow-no-sex", 
              path = "/data/Derived/plink_output")
  
  # wrangle the GWAS results
  
  Focal_name <- deparse(substitute(phenotypes))
  
  gwas_results <- read.table("data/Derived/plink_output/plink.qassoc", 
                             header = TRUE) %>% 
    dplyr::select(SNP, BETA, SE, "T", P)

  # Rename and compress the GWAS summary stats file 
  # The filter step means that only variants in the LD-pruned subset get saved to disk.
  gwas_results %>% 
  #  filter(SNP %in% (pull(read_tsv("data/Derived/plink_output/dgrp2_QC_all_lines_imputed_correct_pruned.prune.in", col_names = "SNP"), SNP))) %>% 
    write_tsv(glue("data/Derived/GWAS_results/{Focal_name}.tsv.gz"))
  unlink("data/Derived/plink_output/plink.qassoc")
  
  # Rename the plink log file
  file.rename("data/Derived/plink_output/plink.log",
              glue("data/Derived/plink_output/{Focal_name}_log.txt"))
  
  unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.bim")
  unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.bed")
  unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.fam")
  unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.log")
} 

Build a function to create manhattan plots for later

Show the code
build_manhattan_plot <- function(gwas_results){
  
  manhattan_data <- gwas_results %>%
    mutate(position = str_split(SNP, "_"), # split the SNP name into logical bits
           chr = map_chr(position, ~ .x[1]), # the first bit is the chromosome arm - name the column appropriately
           position = as.numeric(map_chr(position, ~ .x[2])), # where on the chromosome is the SNP found
           pval = -1 * log10(P)) %>% # make visualising the p values easier
    dplyr::select(chr, position, pval) %>% 
    filter(chr != "4")
  
  # this next chunk finds convenient cuts for labels and colour changes 
  
  max_pos <- manhattan_data %>%
    group_by(chr) %>%
    summarise(
      max_pos = max(position), 
      middle_pos = (max_pos - min(position)) / 2,
      .groups = "drop") %>%
    as.data.frame()
  
  max_pos$max_pos <- c(0, cumsum(max_pos$max_pos[1:4]))
  
  max_pos$label_pos <- max_pos$max_pos + max_pos$middle_pos
  
  # combine the two dataframes created above
  
  manhattan_data <- manhattan_data %>%
    left_join(max_pos, by = "chr") %>%
    mutate(position = position + max_pos,
           chromosome_colour = case_when(chr == "2L" | chr == "3L" | chr == "X" ~ "A",
                                         .default = "B"),
           Notable = if_else(pval >= -log10(1e-08), "YES", "NO"))
  
  plot <- manhattan_data %>%
    ggplot(aes(position, pval, group = chr, stroke = 0.01)) +
    geom_point(aes(colour = chromosome_colour), size = 1.6) +
    geom_hline(yintercept = -log10(1e-08), linetype = 2, colour = "red", linewidth = 1.2) +
    geom_hline(yintercept = -log10(1e-05), linetype = 2, colour = "orange", linewidth = 1.2) +
    scale_colour_manual(values = c(met.brewer(name = "Hokusai3")[3], met.brewer(name = "Hokusai3")[6])) +
    scale_x_continuous(breaks = max_pos$label_pos, labels = max_pos$chr) +
    scale_y_continuous(expand = c(0,0)) +
    ylab("-log~10~(_p_)") + 
    xlab("Chromosome and position") +
    theme_classic() +
    theme(legend.position = "none",
          axis.title.y = element_markdown(size = 18),
          axis.title.x = element_markdown(size = 18),
          axis.text.x = element_text(size = 15),
          axis.text.y = element_text(size = 15))  
}

Cross phenotype meta-analysis

The power to detect variants associated with correlated phenotypes can be increased if a meta-analytic approach is adopted (Zhu et al, 2018). Here, we use the CPASSOC approach developed by Zhou et al (2015), which evaluates the null hypothesis that SNPs are associated with none of the considered traits, weighted by the sample size of each study and adjusted for the trait correlation matrix. The steps to apply CPASSOC are as follows:

  1. Estimate \(R\), the trait correlation matrix. In the DGRP, this can be done using SNP data or using line means.

  2. GWAS each trait separately (see above).

  3. Collate effect sizes for each trait into a vector \(\beta\), for each SNP.

  4. Use a Wald test statistic to estimate a vector of p-values, \(T\), from the \(\beta\) and \(\sigma\) (which is approximated from the study sample size) estimates for each SNP.

  5. Test whether \(\beta\) = 0. If the trait data are homogeneous (SNPs are expected to affect all traits in the same direction), use:

\[S_{Hom} = \frac{e^T(RW)^{-1}T(e^T(RW)^{-1}T)^T}{e^T(WRW)^{-1}e}\] where \(W\) is a diagonal matrix of weights for the individual test statistics (either the inverse of the variance or simply the sample size).

  1. If there is heterogeneity between trait measures (i.e. it is a reasonable expectation that SNPs could affect some traits in one direction and others in the opposing direction), \(S_{Hom}\) is not appropriate. The ideal test statistic in this case would only include the cohorts and traits with a true contribution to the association of a genetic variant. To implement this, the value \(\tau\) is used as a threshold, below which traits are not included in the test statistic. This statistic, \(S_{Het}\) can be viewed as the maximum of the weighted sum of trait-specific test statistics satisfying different thresholds. To calculate \(S_{Het}\) first find,

\[S_{\tau} = \frac{e^T(R(\tau)W(\tau))^{-1}T(\tau)(e^T(R(\tau)W(\tau))^{-1}T(\tau))^T}{e^TW(\tau)^{-1}R(\tau)^{-1}W(\tau)^{-1}e}\] When \(\tau\) is large, \(S(\tau)\) can be undefined if the test statistic falls below \(\tau\) for all cohorts. In this case \(S(\tau) = 0\). Our test statistic is then

\[\displaystyle S_{Het} = \max_{\tau \gt 0} S(\tau)\] To generate p-values, \(S_{Het}\) is then compared with a gamma distribution of test-statistics.

Anyway, I’ve only included that for the mathematically inclined. Code to implement both statistics in R can be downloaded here, and is implemented below.

The functions

These are directly loaded from here

Show the code
require(compiler)
enableJIT(4)
[1] 3
Show the code
Non_Trucated_TestScore <- function(X, SampleSize, CorrMatrix)
{
  Wi = matrix(SampleSize, nrow = 1);
  sumW = sqrt(sum(Wi^2));
  W = Wi / sumW;
  
  Sigma = ginv(CorrMatrix);
  XX = apply(X, 1, function(x) {
    x1 <- matrix(x, ncol = length(x), nrow = 1);
    T = W %*% Sigma %*% t(x1);
    T = (T*T) / (W %*% Sigma %*% t(W));
    return(T[1,1]);
  }
  );
  return(XX);
}
SHom <- cmpfun(Non_Trucated_TestScore);

Trucated_TestScore <- function(X, SampleSize, CorrMatrix, correct = 1, startCutoff = 0, endCutoff = 1, CutoffStep = 0.05, isAllpossible = T)
{
  N = dim(X)[2];
  
  Wi = matrix(SampleSize, nrow = 1);
  sumW = sqrt(sum(Wi^2));
  W = Wi / sumW;
  
  XX = apply(X, 1, function(x) {
    TTT = -1;
    
    if (isAllpossible == T ) {
      cutoff = sort(unique(abs(x)));      ## it will filter out any of them.
    } else {
      cutoff = seq(startCutoff, endCutoff, CutoffStep);
    }
    
    
    for (threshold in cutoff) {
      x1 = x;
      index = which(abs(x1) < threshold);
      
      if (length(index) == N) break;
      
      A = CorrMatrix;
      
      W1 = W;
      if (length(index) !=0 ) {
        x1 = x1[-index];
        A  = A[-index, -index];   ## update the matrix
        W1 = W[-index];
      }
      
      if (correct == 1)
      {
        index = which(x1 < 0);
        if (length(index) != 0) {
          W1[index] = -W1[index];    ## update the sign
        }
      }
      
      A = ginv(A);
      x1 = matrix(x1, nrow = 1);
      W1 = matrix(W1, nrow = 1);
      T = W1 %*% A %*% t(x1);
      T = (T*T) / (W1 %*% A %*% t(W1));
      
      if (TTT < T[1,1]) TTT = T[1,1];
    }
    return(TTT);
  }
  );
  return(XX);
}
SHet <- cmpfun(Trucated_TestScore);

EstimateGamma <- function (N = 1E6, SampleSize, CorrMatrix, correct = 1, startCutoff = 0, endCutoff = 1, CutoffStep = 0.05, isAllpossible = T) {
  
  Wi = matrix(SampleSize, nrow = 1);
  sumW = sqrt(sum(Wi^2));
  W = Wi / sumW;
  
  Permutation = mvrnorm(n = N, mu = c(rep(0, length(SampleSize))), Sigma = CorrMatrix, tol = 1e-8, empirical = F);
  
  Stat =  Trucated_TestScore(X = Permutation, SampleSize = SampleSize, CorrMatrix = CorrMatrix,
                             correct = correct, startCutoff = startCutoff, endCutoff = endCutoff,
                             CutoffStep = CutoffStep, isAllpossible = isAllpossible);
  a = min(Stat)*3/4
  ex3 = mean(Stat*Stat*Stat)
  V =   var(Stat);
  
  for (i in 1:100){
    E = mean(Stat)-a;
    k = E^2/V
    theta = V/E
    a = (-3*k*(k+1)*theta**2+sqrt(9*k**2*(k+1)**2*theta**4-12*k*theta*(k*(k+1)*(k+2)*theta**3-ex3)))/6/k/theta
  }
  
  para = c(k,theta,a);
  return(para);
}

EmpDist <- function (N = 1E6, SampleSize, CorrMatrix, correct = 1, startCutoff = 0, endCutoff = 1, CutoffStep = 0.05, isAllpossible = T) {
  
  Wi = matrix(SampleSize, nrow = 1);
  sumW = sqrt(sum(Wi^2));
  W = Wi / sumW;
  
  Permutation = mvrnorm(n = N, mu = c(rep(0, length(SampleSize))), Sigma = CorrMatrix, tol = 1e-8, empirical = F);
  
  Stat =  Trucated_TestScore(X = Permutation, SampleSize = SampleSize, CorrMatrix = CorrMatrix, correct = correct, startCutoff = startCutoff, endCutoff = endCutoff, CutoffStep = CutoffStep, isAllpossible = isAllpossible);
  
  return(Stat);
}

Analysing life expectancy and lifespan equality

Univariate GWAS

Show the code
# prepare phenotype data for GWAS

Arya_f_l <- prep_for_e0_GWAS(Arya_2010_f)
Arya_m_l <- prep_for_e0_GWAS(Arya_2010_m)
Arya_f_h <- prep_for_h_GWAS(Arya_2010_f)
Arya_m_h <- prep_for_h_GWAS(Arya_2010_m)
Huang_f_18_l <- prep_for_e0_GWAS(Huang_2020_f_18)
Huang_f_18_h <- prep_for_h_GWAS(Huang_2020_f_18)
Huang_m_18_l <- prep_for_e0_GWAS(Huang_2020_m_18)
Huang_m_18_h <- prep_for_h_GWAS(Huang_2020_m_18)
Huang_f_25_l <- prep_for_e0_GWAS(Huang_2020_f_25)
Huang_f_25_h <- prep_for_h_GWAS(Huang_2020_f_25)
Huang_m_25_l <- prep_for_e0_GWAS(Huang_2020_m_25)
Huang_m_25_h <- prep_for_h_GWAS(Huang_2020_m_25)
Huang_f_28_l <- prep_for_e0_GWAS(Huang_2020_f_28)
Huang_f_28_h <- prep_for_h_GWAS(Huang_2020_f_28)
Huang_m_28_l <- prep_for_e0_GWAS(Huang_2020_m_28)
Huang_m_28_h <- prep_for_h_GWAS(Huang_2020_m_28)
Wilson_f_l_1 <- prep_for_e0_GWAS(Wilson_2020_f_1)
Wilson_f_h_1 <- prep_for_h_GWAS(Wilson_2020_f_1)
Wilson_f_l_2 <- prep_for_e0_GWAS(Wilson_2020_f_2)
Wilson_f_h_2 <- prep_for_h_GWAS(Wilson_2020_f_2)
Durham_f_l <- prep_for_e0_GWAS(Durham_2014_f_adjusted)
Durham_f_h <- prep_for_h_GWAS(Durham_2014_f_adjusted)
Patel_f_l <- prep_for_e0_GWAS(Patel_2021_f)
Patel_f_h <- prep_for_h_GWAS(Patel_2021_f)

# if not already done, run the GWA tests

if(!file.exists("data/Derived/GWAS_results/Arya_f_l.tsv.gz")) {
run_GWAS(Arya_f_l)
run_GWAS(Arya_m_l)
run_GWAS(Arya_f_h)
run_GWAS(Arya_m_h)
run_GWAS(Huang_f_18_l)
run_GWAS(Huang_f_18_h)
run_GWAS(Huang_m_18_l)
run_GWAS(Huang_m_18_h)
run_GWAS(Huang_f_25_l)
run_GWAS(Huang_f_25_h)
run_GWAS(Huang_m_25_l)
run_GWAS(Huang_m_25_h)
run_GWAS(Huang_f_28_l)
run_GWAS(Huang_f_28_h)
run_GWAS(Huang_m_28_l)
run_GWAS(Huang_m_28_h)
run_GWAS(Wilson_f_l_1)
run_GWAS(Wilson_f_h_1)
run_GWAS(Wilson_f_l_2)
run_GWAS(Wilson_f_h_2)
run_GWAS(Durham_f_l)
run_GWAS(Durham_f_h)
run_GWAS(Patel_f_l)
run_GWAS(Patel_f_h)
}

As a point of comparison, we find the sum of significant associations detected by univariate GWAS

Show the code
# load GWAS results

# Life expectancy

Arya_f_l_GWAS <- read_tsv("data/Derived/GWAS_results/Arya_f_l.tsv.gz") 
  
Huang_f_18_l_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_f_18_l.tsv.gz")

Huang_f_25_l_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_f_25_l.tsv.gz") 

Huang_f_28_l_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_f_28_l.tsv.gz")

Wilson_f_l_1_GWAS <- read_tsv("data/Derived/GWAS_results/Wilson_f_l_1.tsv.gz") 

Wilson_f_l_2_GWAS <- read_tsv("data/Derived/GWAS_results/Wilson_f_l_2.tsv.gz") 

Durham_f_l_GWAS <- read_tsv("data/Derived/GWAS_results/Durham_f_l.tsv.gz")

Patel_f_l_GWAS <- read_tsv("data/Derived/GWAS_results/Patel_f_l.tsv.gz")

Arya_m_l_GWAS <- read_tsv("data/Derived/GWAS_results/Arya_m_l.tsv.gz")

Huang_m_18_l_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_m_18_l.tsv.gz")

Huang_m_25_l_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_m_25_l.tsv.gz")

Huang_m_28_l_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_m_28_l.tsv.gz")

# Lifespan equality

Arya_f_h_GWAS <- read_tsv("data/Derived/GWAS_results/Arya_f_h.tsv.gz")
  
Huang_f_18_h_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_f_18_h.tsv.gz")

Huang_f_25_h_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_f_25_h.tsv.gz") 

Huang_f_28_h_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_f_28_h.tsv.gz")

Wilson_f_h_1_GWAS <- read_tsv("data/Derived/GWAS_results/Wilson_f_h_1.tsv.gz")

Wilson_f_h_2_GWAS <- read_tsv("data/Derived/GWAS_results/Wilson_f_h_2.tsv.gz")

Durham_f_h_GWAS <- read_tsv("data/Derived/GWAS_results/Durham_f_h.tsv.gz")

Patel_f_h_GWAS <- read_tsv("data/Derived/GWAS_results/Patel_f_h.tsv.gz")

Arya_m_h_GWAS <- read_tsv("data/Derived/GWAS_results/Arya_m_h.tsv.gz")

Huang_m_18_h_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_m_18_h.tsv.gz")

Huang_m_25_h_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_m_25_h.tsv.gz")

Huang_m_28_h_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_m_28_h.tsv.gz")

Table SX. Genotype to phenotype associations detected by univariate GWAS, for life expectancy. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.

Show the code
# filter down to sig associations
e0_table <-
  bind_rows(
    Arya_f_l_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Arya_f_l_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Arya et al 2010",
             Treatment = "1",
             Sex = "Female",
             Temperature = "25",
             `Mating status` = "Virgin") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    
    Huang_f_18_l_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Huang_f_18_l_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Huang et al 2020",
             Treatment = "1",
             Sex = "Female",
             Temperature = "18",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Huang_f_25_l_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Huang_f_25_l_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Huang et al 2020",
             Treatment = "1",
             Sex = "Female",
             Temperature = "25",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Huang_f_28_l_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Huang_f_28_l_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Huang et al 2020",
             Treatment = "1",
             Sex = "Female",
             Temperature = "28",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Wilson_f_l_1_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Wilson_f_l_1_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Wilson et al 2020",
             Treatment = "1",
             Sex = "Female",
             Temperature = "25",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Wilson_f_l_2_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Wilson_f_l_2_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Wilson et al 2020*",
             Treatment = "2",
             Sex = "Female",
             Temperature = "25",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Durham_f_l_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Durham_f_l_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Durham et al 2014",
             Treatment = "1",
             Sex = "Female",
             Temperature = "25",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Patel_f_l_GWAS %>% 
      filter(P < 1e-05) %>%
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Patel_f_l_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Patel et al 2021",
             Treatment = "1",
             Sex = "Female",
             Temperature = "23",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Arya_m_l_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Arya_m_l_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Arya et al 2010",
             Treatment = "1",
             Sex = "Male",
             Temperature = "25",
             `Mating status` = "Virgin") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Huang_m_18_l_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Huang_m_18_l_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Huang et al 2020",
             Treatment = "1",
             Sex = "Male",
             Temperature = "18",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Huang_m_25_l_GWAS %>% 
      filter(P < 1e-05) %>%
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Huang_m_25_l_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Huang et al 2020",
             Treatment = "1",
             Sex = "Male",
             Temperature = "25",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Huang_m_28_l_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Huang_m_28_l_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Huang et al 2020",
             Treatment = "1",
             Sex = "Male",
             Temperature = "28",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)
  ) 

# how many unique variants have been detected?
p_05_SNPs <-
  bind_rows(
    
    Arya_f_l_GWAS %>% 
      filter(P < 1e-05),
    
    Arya_m_l_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_f_18_l_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_f_25_l_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_f_28_l_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_m_18_l_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_m_25_l_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_m_28_l_GWAS %>% 
      filter(P < 1e-05),
    
    Wilson_f_l_1_GWAS %>% 
      filter(P < 1e-05),
    
    Wilson_f_l_2_GWAS %>% 
      filter(P < 1e-05),
    
    Durham_f_l_GWAS %>% 
      filter(P < 1e-05),
    
    Patel_f_l_GWAS %>% 
      filter(P < 1e-05)
  ) %>% 
  distinct(SNP) %>% 
  nrow()


e0_table %>% 
  add_row(Study = "Totals",
          Sex = "",
          Temperature = "",
          `Mating status` = "",
          `p < 1e-05 variants` = p_05_SNPs,
          `p < 1e-08 variants` = sum(e0_table$`p < 1e-08 variants`)) %>% 
  kable() %>% 
  kable_styling()
Study Sex Temperature Mating status p < 1e-05 variants p < 1e-08 variants
Arya et al 2010 Female 25 Virgin 33 0
Huang et al 2020 Female 18 Mated 16 0
Huang et al 2020 Female 25 Mated 42 0
Huang et al 2020 Female 28 Mated 34 0
Wilson et al 2020 Female 25 Mated 23 0
Wilson et al 2020* Female 25 Mated 10 0
Durham et al 2014 Female 25 Mated 45 0
Patel et al 2021 Female 23 Mated 95 0
Arya et al 2010 Male 25 Virgin 12 0
Huang et al 2020 Male 18 Mated 25 0
Huang et al 2020 Male 25 Mated 41 0
Huang et al 2020 Male 28 Mated 21 0
Totals 387 0

Table SX. Genotype to phenotype associations detected by univariate GWAS, for lifespan equality. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.

Show the code
# filter down to sig associations
h_table <-
  bind_rows(
    Arya_f_h_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Arya_f_h_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Arya et al 2010",
             Treatment = "1",
             Sex = "Female",
             Temperature = "25",
             `Mating status` = "Virgin") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    
    Huang_f_18_h_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Huang_f_18_h_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Huang et al 2020",
             Treatment = "1",
             Sex = "Female",
             Temperature = "18",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Huang_f_25_h_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Huang_f_25_h_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Huang et al 2020",
             Treatment = "1",
             Sex = "Female",
             Temperature = "25",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Huang_f_28_h_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Huang_f_28_h_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Huang et al 2020",
             Treatment = "1",
             Sex = "Female",
             Temperature = "28",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Wilson_f_h_1_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Wilson_f_h_1_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Wilson et al 2020",
             Treatment = "1",
             Sex = "Female",
             Temperature = "25",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Wilson_f_h_2_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Wilson_f_h_2_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Wilson et al 2020*",
             Treatment = "2",
             Sex = "Female",
             Temperature = "25",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Durham_f_h_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Durham_f_h_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Durham et al 2014",
             Treatment = "1",
             Sex = "Female",
             Temperature = "25",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Patel_f_h_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Patel_f_h_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Patel et al 2021",
             Treatment = "1",
             Sex = "Female",
             Temperature = "23",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Arya_m_h_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Arya_m_h_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Arya et al 2010",
             Treatment = "1",
             Sex = "Male",
             Temperature = "25",
             `Mating status` = "Virgin") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Huang_m_18_h_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Huang_m_18_h_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Huang et al 2020",
             Treatment = "1",
             Sex = "Male",
             Temperature = "18",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Huang_m_25_h_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Huang_m_25_h_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Huang et al 2020",
             Treatment = "1",
             Sex = "Male",
             Temperature = "25",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
    
    Huang_m_28_h_GWAS %>% 
      filter(P < 1e-05) %>% 
      transmute(`p < 1e-05 variants` = n(),
                `p < 1e-08 variants` = nrow(filter(Huang_m_28_h_GWAS, P < 1e-08))) %>%
      distinct() %>% 
      mutate(Study = "Huang et al 2020",
             Treatment = "1",
             Sex = "Male",
             Temperature = "28",
             `Mating status` = "Mated") %>% 
      dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)
  ) 

# how many unique variants have been detected?
p_05_SNPs_h <-
  bind_rows(
    
    Arya_f_h_GWAS %>% 
      filter(P < 1e-05),
    
    Arya_m_h_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_f_18_h_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_f_25_h_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_f_28_h_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_m_18_h_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_m_25_h_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_m_28_h_GWAS %>% 
      filter(P < 1e-05),
    
    Wilson_f_h_1_GWAS %>% 
      filter(P < 1e-05),
    
    Wilson_f_h_2_GWAS %>% 
      filter(P < 1e-05),
    
    Durham_f_h_GWAS %>% 
      filter(P < 1e-05),
    
    Patel_f_h_GWAS %>% 
      filter(P < 1e-05)
  ) %>% 
  distinct(SNP) %>% 
  nrow()


h_table %>% 
  add_row(Study = "Totals",
          Sex = "",
          Temperature = "",
          `Mating status` = "",
          `p < 1e-05 variants` = p_05_SNPs_h,
          `p < 1e-08 variants` = sum(h_table$`p < 1e-08 variants`)) %>% 
  kable() %>% 
  kable_styling()
Study Sex Temperature Mating status p < 1e-05 variants p < 1e-08 variants
Arya et al 2010 Female 25 Virgin 10 0
Huang et al 2020 Female 18 Mated 4 0
Huang et al 2020 Female 25 Mated 15 0
Huang et al 2020 Female 28 Mated 42 0
Wilson et al 2020 Female 25 Mated 47 0
Wilson et al 2020* Female 25 Mated 48 0
Durham et al 2014 Female 25 Mated 7 0
Patel et al 2021 Female 23 Mated 12 0
Arya et al 2010 Male 25 Virgin 15 0
Huang et al 2020 Male 18 Mated 16 0
Huang et al 2020 Male 25 Mated 21 0
Huang et al 2020 Male 28 Mated 40 0
Totals 257 0

Applying cross-phenotype meta-analysis

Generate the genetic correlation matrix

We calculate the genetic correlations between traits using both the line mean and SNP effect size comparisons. We use the SNP correlations following Zhu et al (2015) and show these in the below plots. Note that while the two approaches produce results that have a strong positive correlation, there are small differences that might affect downstream analyses.

Show the code
# use the BETA coefficients to build the SNP correlation matrix

SNP_data_e0 <-
  bind_rows(
    Arya_f_l_GWAS %>% 
      mutate(Study = "Arya_2010", Sex = "Female", Temperature = 25),
    Huang_f_18_l_GWAS %>% 
      mutate(Study= "Huang_2020", Sex= "Female", Temperature= 18),
    Huang_f_25_l_GWAS %>% 
      mutate(Study= "Huang_2020", Sex= "Female", Temperature= 25),
    Huang_f_28_l_GWAS %>% 
      mutate(Study= "Huang_2020", Sex= "Female", Temperature= 28),
    Wilson_f_l_1_GWAS %>% 
      mutate(Study= "Wilson_2020_1", Sex= "Female", Temperature= 25),
    Wilson_f_l_2_GWAS %>% 
      mutate(Study= "Wilson_2020_2", Sex= "Female", Temperature= 25),
    Durham_f_l_GWAS %>% 
      mutate(Study= "Durham_2014", Sex= "Female", Temperature= 25),
    Patel_f_l_GWAS %>% 
      mutate(Study= "Patel_2021", Sex= "Female", Temperature= 23),
    Arya_m_l_GWAS %>% 
      mutate(Study= "Arya_2010", Sex= "Male", Temperature= 25),
    Huang_m_18_l_GWAS %>% 
      mutate(Study= "Huang_2020", Sex= "Male", Temperature= 18),
    Huang_m_25_l_GWAS %>% 
      mutate(Study= "Huang_2020", Sex= "Male", Temperature = 25),
    Huang_m_28_l_GWAS %>% 
      mutate(Study = "Huang_2020", Sex = "Male", Temperature = 28)) %>% 
  dplyr::select(SNP, BETA, Study, Sex, Temperature) %>% 
  pivot_wider(values_from = BETA, names_from = c(Study, Sex, Temperature)) %>% 
  dplyr::select(-SNP) %>% 
  rename(Arya_f_25 = Arya_2010_Female_25,
         Huang_f_18 = Huang_2020_Female_18,
         Huang_f_25 = Huang_2020_Female_25,
         Huang_f_28 = Huang_2020_Female_28,
         Wilson_f_25_1 = Wilson_2020_1_Female_25,
         Wilson_f_25_2 = Wilson_2020_2_Female_25,
         Durham_f_25 = Durham_2014_Female_25,
         Patel_f_23 = Patel_2021_Female_23,
         Arya_m_25 = Arya_2010_Male_25,
         Huang_m_18 = Huang_2020_Male_18,
         Huang_m_25 = Huang_2020_Male_25,
         Huang_m_28 = Huang_2020_Male_28)

SNP_data_h <-
  bind_rows(
    Arya_f_h_GWAS %>% 
      mutate(Study = "Arya_2010", Sex = "Female", Temperature = 25),
    Huang_f_18_h_GWAS %>% 
      mutate(Study= "Huang_2020", Sex= "Female", Temperature= 18),
    Huang_f_25_h_GWAS %>% 
      mutate(Study= "Huang_2020", Sex= "Female", Temperature= 25),
    Huang_f_28_h_GWAS %>% 
      mutate(Study= "Huang_2020", Sex= "Female", Temperature= 28),
    Wilson_f_h_1_GWAS %>% 
      mutate(Study= "Wilson_2020_1", Sex= "Female", Temperature= 25),
    Wilson_f_h_2_GWAS %>% 
      mutate(Study= "Wilson_2020_2", Sex= "Female", Temperature= 25),
    Durham_f_h_GWAS %>% 
      mutate(Study= "Durham_2014", Sex= "Female", Temperature= 25),
    Patel_f_h_GWAS %>% 
      mutate(Study= "Patel_2021", Sex= "Female", Temperature= 23),
    Arya_m_h_GWAS %>% 
      mutate(Study= "Arya_2010", Sex= "Male", Temperature= 25),
    Huang_m_18_h_GWAS %>% 
      mutate(Study= "Huang_2020", Sex= "Male", Temperature= 18),
    Huang_m_25_h_GWAS %>% 
      mutate(Study= "Huang_2020", Sex= "Male", Temperature = 25),
    Huang_m_28_h_GWAS %>% 
      mutate(Study = "Huang_2020", Sex = "Male", Temperature = 28)) %>% 
  dplyr::select(SNP, BETA, Study, Sex, Temperature) %>% 
  pivot_wider(values_from = BETA, names_from = c(Study, Sex, Temperature)) %>% 
  dplyr::select(-SNP) %>%
  rename(Arya_f_25 = Arya_2010_Female_25,
         Huang_f_18 = Huang_2020_Female_18,
         Huang_f_25 = Huang_2020_Female_25,
         Huang_f_28 = Huang_2020_Female_28,
         Wilson_f_25_1 = Wilson_2020_1_Female_25,
         Wilson_f_25_2 = Wilson_2020_2_Female_25,
         Durham_f_25 = Durham_2014_Female_25,
         Patel_f_23 = Patel_2021_Female_23,
         Arya_m_25 = Arya_2010_Male_25,
         Huang_m_18 = Huang_2020_Male_18,
         Huang_m_25 = Huang_2020_Male_25,
         Huang_m_28 = Huang_2020_Male_28)
  

SNP_e0_corr_matrix <- cor(SNP_data_e0, use = "pairwise.complete.obs", method = "spearman")
SNP_h_corr_matrix <- cor(SNP_data_h, use = "pairwise.complete.obs", method = "spearman")


line_data <-
  bind_rows(Arya_2010_f,
            Huang_2020_f_18,
            Huang_2020_f_25,
            Huang_2020_f_28,
            Wilson_2020_f_1,
            Wilson_2020_f_2,
            Durham_2014_f_adjusted,
            Patel_2021_f,
            Arya_2010_m,
            Huang_2020_m_18,
            Huang_2020_m_25,
            Huang_2020_m_28) %>% 
  dplyr::select(line, Treatment, Sex, Temperature, e0, h) %>% 
  pivot_wider(values_from = c(e0, h), names_from = c(Treatment, Sex, Temperature)) 

line_data_e0 <-
  line_data %>% 
  dplyr::select(contains("e0")) %>% 
  rename(Arya_f_25 = e0_Arya_2010_1_Female_25,
         Huang_f_18 = e0_Huang_2020_1_Female_18,
         Huang_f_25 =  e0_Huang_2020_1_Female_25,
         Huang_f_28 = e0_Huang_2020_1_Female_28,
         Wilson_f_25_1 = e0_Wilson_2020_1_Female_25,
         Wilson_f_25_2 = e0_Wilson_2020_2_Female_25,
         Durham_f_25 = e0_Durham_2014_1_Female_25,
         Patel_f_23 = e0_Patel_2021_1_Female_23,
         Arya_m_25 = e0_Arya_2010_1_Male_25,
         Huang_m_18 = e0_Huang_2020_1_Male_18,
         Huang_m_25 = e0_Huang_2020_1_Male_25,
         Huang_m_28 = e0_Huang_2020_1_Male_28)

line_data_h <-
  line_data %>% 
  dplyr::select(!contains("e0"), -line) %>% 
  rename(Arya_f_25 = h_Arya_2010_1_Female_25,
         Huang_f_18 = h_Huang_2020_1_Female_18,
         Huang_f_25 =  h_Huang_2020_1_Female_25,
         Huang_f_28 = h_Huang_2020_1_Female_28,
         Wilson_f_25_1 = h_Wilson_2020_1_Female_25,
         Wilson_f_25_2 = h_Wilson_2020_2_Female_25,
         Durham_f_25 = h_Durham_2014_1_Female_25,
         Patel_f_23 = h_Patel_2021_1_Female_23,
         Arya_m_25 = h_Arya_2010_1_Male_25,
         Huang_m_18 = h_Huang_2020_1_Male_18,
         Huang_m_25 = h_Huang_2020_1_Male_25,
         Huang_m_28 = h_Huang_2020_1_Male_28)

line_e0_corr_matrix <- cor(line_data_e0, use = "pairwise.complete.obs", method = "spearman")
line_h_corr_matrix <- cor(line_data_h, use = "pairwise.complete.obs", method = "spearman")

Let’s visualise the genetic correlation between lifespan measures. First for life expectancy:

Show the code
breaksList <- seq(-1, 1, by = 0.02)

pheatmap(SNP_e0_corr_matrix, breaks = breaksList, 
main = "", legend_labels = c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),
color = colorRampPalette(rev(met.brewer("Benedictus", direction = 1)))(length(breaksList)),
legend = TRUE, cutree_rows = 3, cutree_cols = 3, angle_col = 45, border_color = "white")

Now for lifespan equality

Show the code
pheatmap(SNP_h_corr_matrix, breaks = breaksList, 
main = "", legend_labels = c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),
color = colorRampPalette(rev(met.brewer("Benedictus", direction = 1)))(length(breaksList)),
legend = TRUE, cutree_rows = 3, cutree_cols = 3, angle_col = 45, border_color = "white")

Calculate meta-analytic test statistics

The purpose of this meta-analysis is to search for SNPs that have some effect on ageing, expressed in many different contexts (sexes, temperatures and mating status’).

To conduct CPASSOC for a given SNP, we need a summary statistic from each GWAS. A different number of lines were included in each GWAS, which can cause small differences in the number of SNPs assessed. We therefore prune the list of SNPs to those included in all univariate analyses. After pruning, 1,603,573 SNPs remain.

The Bonferroni adjusted significance threshold for this number of tests is \(p_{adj} = \frac{0.05}{1603573} = 3.12*10^{-8}\); here and for all future analysis, we use p \(< 10^{-8}\) as our significance threshold, as this is slightly more conservative and easier to quickly interpret.

Life expectancy

Show the code
Arya_f_l_T <- Arya_f_l_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Arya_f = T)
  
Huang_f_18_l_T <- Huang_f_18_l_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_f_18 = T)

Huang_f_25_l_T <- Huang_f_25_l_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_f_25 = T)

Huang_f_28_l_T <- Huang_f_28_l_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_f_28 = T)

Wilson_f_l_1_T <- Wilson_f_l_1_GWAS %>% 
  dplyr::select(SNP, T) %>%  
  rename(Wilson_f_25_1 = T)

Wilson_f_l_2_T <- Wilson_f_l_2_GWAS %>% 
  dplyr::select(SNP, T) %>%  
  rename(Wilson_f_25_2 = T)

Durham_f_l_T <- Durham_f_l_GWAS %>% 
  dplyr::select(SNP, T) %>%  
  rename(Durham_f_25 = T)

Patel_f_l_T <- Patel_f_l_GWAS %>% 
  dplyr::select(SNP, T) %>%  
  rename(Patel_f_23 = T)

Arya_m_l_T <- Arya_m_l_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Arya_m = T)

Huang_m_18_l_T <- Huang_m_18_l_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_m_18  = T)

Huang_m_25_l_T <- Huang_m_25_l_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_m_25 = T)

Huang_m_28_l_T <- Huang_m_28_l_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_m_28  = T)

all_e0_effect_sizes <-
  Arya_f_l_T %>%
  inner_join(Huang_f_18_l_T, by = "SNP") %>%
  inner_join(Huang_f_25_l_T, by = "SNP") %>%
  inner_join(Huang_f_28_l_T, by = "SNP") %>% 
  inner_join(Wilson_f_l_1_T, by = "SNP") %>% 
  inner_join(Wilson_f_l_2_T, by = "SNP") %>% 
  inner_join(Durham_f_l_T, by = "SNP") %>% 
  inner_join(Patel_f_l_T, by = "SNP") %>% 
  inner_join(Arya_m_l_T, by = "SNP") %>% 
  inner_join(Huang_m_18_l_T, by = "SNP") %>% 
  inner_join(Huang_m_25_l_T, by = "SNP") %>%
  inner_join(Huang_m_28_l_T, by = "SNP")

all_e0_effect_sizes_values <-
  all_e0_effect_sizes %>% 
  dplyr::select(2:13)

Sample_size_all <- c(165, 183, 186, 177, 161, 161, 176, 193, 165, 183, 186, 177) 

if(!file.exists("data/Derived/GWAS_results/all_e0_meta_results.csv")) {

# run the homogeneous effect meta-analysis

S_hom <- SHom(all_e0_effect_sizes_values, Sample_size_all, SNP_e0_corr_matrix)

# calculate meta-p-values and bind the two together with the SNP names

p_S_hom <- pchisq(S_hom, df = 1, ncp = 0, lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_hom) %>% 
  rename(meta_p_hom = value, 
         S_hom = ...2)

# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary (potentially in sign) for different traits e.g. if a SNP has a sex or enviornment opposite effect on lifespan)

# estimate parameters of gamma distribution

para <- EstimateGamma(N = 1E4, Sample_size_all, SNP_e0_corr_matrix);

S_het <- SHet(all_e0_effect_sizes_values, Sample_size_all, SNP_e0_corr_matrix)

# obtain P-values of S_Het using the estimated gamma parameters
  
p_S_het <- pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_het) %>% 
  rename(meta_p_het = value, 
         S_het = ...2)

# bind meta statistics with the univariate effect sizes

all_e0_meta_results <- 
  all_e0_effect_sizes %>% 
  bind_cols(p_S_hom,
            p_S_het) 

write_csv(all_e0_meta_results, "data/Derived/GWAS_results/all_e0_meta_results.csv")

} else all_e0_meta_results <- read_csv("data/Derived/GWAS_results/all_e0_meta_results.csv")

Lifespan equality

Show the code
Arya_f_h_T <- Arya_f_h_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Arya_f = T)
  
Huang_f_18_h_T <- Huang_f_18_h_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_f_18 = T)

Huang_f_25_h_T <- Huang_f_25_h_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_f_25 = T)

Huang_f_28_h_T <- Huang_f_28_h_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_f_28 = T)

Wilson_f_h_1_T <- Wilson_f_h_1_GWAS %>% 
  dplyr::select(SNP, T) %>%  
  rename(Wilson_f_25_1 = T)

Wilson_f_h_2_T <- Wilson_f_h_2_GWAS %>% 
  dplyr::select(SNP, T) %>%  
  rename(Wilson_f_25_2 = T)

Durham_f_h_T <- Durham_f_h_GWAS %>% 
  dplyr::select(SNP, T) %>%  
  rename(Durham_f_25 = T)

Patel_f_h_T <- Patel_f_h_GWAS %>% 
  dplyr::select(SNP, T) %>%  
  rename(Patel_f_23 = T)

Arya_m_h_T <- Arya_m_h_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Arya_m = T)

Huang_m_18_h_T <- Huang_m_18_h_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_m_18  = T)

Huang_m_25_h_T <- Huang_m_25_h_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_m_25 = T)

Huang_m_28_h_T <- Huang_m_28_h_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_m_28  = T)


all_h_effect_sizes <-
  Arya_f_h_T %>%
  inner_join(Huang_f_18_h_T, by = "SNP") %>%
  inner_join(Huang_f_25_h_T, by = "SNP") %>%
  inner_join(Huang_f_28_h_T, by = "SNP") %>% 
  inner_join(Wilson_f_h_1_T, by = "SNP") %>%
  inner_join(Wilson_f_h_2_T, by = "SNP") %>% 
  inner_join(Durham_f_h_T, by = "SNP") %>% 
  inner_join(Patel_f_h_T, by = "SNP") %>% 
  inner_join(Arya_m_h_T, by = "SNP") %>% 
  inner_join(Huang_m_18_h_T, by = "SNP") %>% 
  inner_join(Huang_m_25_h_T, by = "SNP") %>%
  inner_join(Huang_m_28_h_T, by = "SNP") 
  

all_h_effect_sizes_values <-
  all_h_effect_sizes %>% 
  dplyr::select(2:13)

if(!file.exists("data/Derived/GWAS_results/all_h_meta_results.csv")) {

S_hom <- SHom(all_h_effect_sizes_values, Sample_size_all, SNP_h_corr_matrix)

# calculate meta-p-values and bind the two together with the SNP names

p_S_hom <- pchisq(S_hom, df = 1, ncp = 0, lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_hom) %>% 
  rename(meta_p_hom = value, 
         S_hom = ...2)

# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary (potentially in sign) for different traits e.g. if a SNP has a sex or enviornment opposite effect on lifespan)

# estimate parameters of gamma distribution

para <- EstimateGamma(N = 1E4, Sample_size_all, SNP_h_corr_matrix);

S_het <- SHet(all_h_effect_sizes_values, Sample_size_all, SNP_h_corr_matrix)

# obtain P-values of S_Het using the estimated gamma parameters
  
p_S_het <- pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_het) %>% 
  rename(meta_p_het = value, 
         S_het = ...2)

# bind meta statistics with the univariate effect sizes

all_h_meta_results <- 
  all_h_effect_sizes %>% 
  bind_cols(p_S_hom,
            p_S_het)

write_csv(all_h_meta_results, "data/Derived/GWAS_results/all_h_meta_results.csv")
} else all_h_meta_results <- read_csv("data/Derived/GWAS_results/all_h_meta_results.csv")

Visualise the results

We combine GWAS summary statistics calculated from lifespan data measured across different contexts. It’s likely that some SNPs have G x E interactions that would lead to a heterogeneous effect across phenotypes. We therefore utilise the S_het calculated p-values.

First lets show the effect of CPASSOC on the number of variants found to be associated with life expectancy and lifespan equality.

Table SX…th effect of CPASSOC on the number of variants detected with associations that exceed the significance threshold.

Show the code
tibble(Analysis = c("CPASSOC", "Univariate", "CPASSOC", "Univariate"),
       Trait = c("Life expectancy", "Life expectancy", "Lifespan equality", "Lifespan equality"),
       `p < 1e-05 variants` = c(sum(all_e0_meta_results$meta_p_het < 1e-05),
                                   p_05_SNPs,
                                   sum(all_h_meta_results$meta_p_het < 1e-05),
                                   p_05_SNPs_h),
       `p < 1e-08 variants` = c(sum(all_e0_meta_results$meta_p_het < 1e-08),
                                   sum(e0_table$`p < 1e-08 variants`),
                                   sum(all_h_meta_results$meta_p_het < 1e-08),
                                   sum(h_table$`p < 1e-08 variants`)))  %>% 
  kable() %>% 
  kable_styling()
Analysis Trait p < 1e-05 variants p < 1e-08 variants
CPASSOC Life expectancy 401 87
Univariate Life expectancy 387 0
CPASSOC Lifespan equality 125 16
Univariate Lifespan equality 257 0

Table SX the variants that have significant associations with life expectancy.

Show the code
# join gene annotations with the list of analysed variants 
# note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.

life_expectancy_variants <-
  all_e0_meta_results %>%
  filter(meta_p_het < 1e-08) %>% 
  dplyr::select(SNP, S_het, meta_p_het) %>%
  left_join(annotations %>% filter(distance.to.gene <= 500)) %>% 
  mutate(meta_p_het = round(meta_p_het*10^18, 3)/10^18,
         S_het = round(S_het, 3)) %>% 
  dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)

life_expectancy_variants %>% 
  my_data_table()

Table SX the variants that have significant associations with lifespan equality.

Show the code
# join gene annotations with the list of analysed variants 
# note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.

lifespan_equality_variants <-
  all_h_meta_results %>%
  filter(meta_p_het < 1e-08) %>% 
  dplyr::select(SNP, S_het, meta_p_het) %>%
  left_join(annotations %>% filter(distance.to.gene <= 500)) %>% 
  mutate(meta_p_het = round(meta_p_het*10^11, 3)/10^11,
         S_het = round(S_het, 3)) %>% 
  dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)

lifespan_equality_variants %>% 
  my_data_table()

Now lets build some ‘Manhattan plots’ to show where these significant associations can be found:

Show the code
common_SNPs <- 
  all_e0_meta_results %>% filter(meta_p_het < 1e-08) %>% 
  inner_join(all_h_meta_results %>% filter(meta_p_het < 1e-08), by = "SNP") %>% 
  mutate(common_SNP = "YES") %>% 
  dplyr::select(SNP, common_SNP)

e0_results <- 
  all_e0_meta_results %>% 
  dplyr::select(SNP, meta_p_hom, meta_p_het) %>% 
  rename(P = meta_p_het) %>% 
  left_join(common_SNPs) %>% 
  mutate(common_SNP = if_else(is.na(common_SNP), "NO", common_SNP))
  

h_results <- 
  all_h_meta_results %>% 
  dplyr::select(SNP, meta_p_hom, meta_p_het) %>% 
  rename(P = meta_p_het) %>% 
  left_join(common_SNPs) %>% 
  mutate(common_SNP = if_else(is.na(common_SNP), "NO", common_SNP))

# plot the results using the manhattan plot custom function we defined earlier

e0_het_plot <- build_manhattan_plot(e0_results) +
  labs(title = "Life expectancy") +
  theme(plot.title = element_text(size = 20, hjust = 0.5)) +
  coord_cartesian(ylim = c(0, 22))

h_het_plot <- build_manhattan_plot(h_results) +
  labs(title = "Lifespan equality") +
  theme(plot.title = element_text(size = 20, hjust = 0.5)) +
  coord_cartesian(ylim = c(0, 22))


e0_het_plot + h_het_plot

Plot the univariate effect sizes for each of the SNPs associated with life expectancy / lifespan equality at the genome-wide significance threshold (p < \(0.05^{-8}\)) after CPASSOC.

Life expectancy

Show the code
SNP_heatmap_e0 <-
  all_e0_meta_results %>% filter(meta_p_het < 1e-08) %>% 
  arrange(meta_p_het) %>% dplyr::select(1:13) 

row_name <- SNP_heatmap_e0$SNP
SNP_heatmap_e0 <- SNP_heatmap_e0 %>% dplyr::select(-SNP) %>% as.matrix()
rownames(SNP_heatmap_e0) <- row_name

breaksList <- seq(-5, 5, by = 0.1)

annotation_SNPs <- 
  all_e0_meta_results %>% filter(meta_p_het < 1e-08) %>% dplyr::select(SNP) %>% 
  mutate(Chromosome = case_when(str_detect(SNP, "2L") ~ "2L",
                                str_detect(SNP, "2R") ~ "2R",
                                str_detect(SNP, "3L") ~ "3L",
                                str_detect(SNP, "3R") ~ "3R",
                                str_detect(SNP, "X") ~ "X"))

annotation_studies <- 
  tibble(Study = c("Arya_f",
                   "Huang_f_18",
                   "Huang_f_25",
                   "Huang_f_28",
                   "Wilson_f_25_1",
                   "Wilson_f_25_2",
                   "Durham_f_25",
                   "Patel_f_23",
                   "Arya_m",
                   "Huang_m_18",
                   "Huang_m_25",
                   "Huang_m_28"),
         Temperature = c("25",
                         "18",
                         "25",
                         "28",
                         "25",
                         "25",
                         "25",
                         "23",
                         "25",
                         "18",
                         "25",
                         "28")) %>% 
  mutate(Sex = case_when(str_detect(Study, "_f") ~ "Female",
                         .default = "Male"),
         Mating = case_when(str_detect(Study, "Arya") ~ "NO",
                             str_detect(Study, "Huang") ~ "Throughout life",
                             str_detect(Study, "Wilson") ~ "Early life",
                             str_detect(Study, "Durham") ~ "Throughout life",
                             str_detect(Study, "Patel") ~ "Early life"),
         Author = str_extract(Study, ".*(?=\\_)"),
         Author = str_remove(Author, "_f"),
         Author = str_remove(Author, "_m"))


# create a study annotation column, need this to be a data.frame rather than a tibble for some reason 

Study_details <- annotation_studies %>%
  as.data.frame() %>% 
  dplyr::select(Study, Temperature, Mating)

my_categories <- data.frame(row.names = Study_details[, 1], Temperature = Study_details[, 2],
                            Mating = Study_details[, 3])

my_colors <- list(Temperature = c("18" = "#7bbcd5", # sailboat colours from pnw
                                  "23" = "#d0e2af",
                                  "25" = "#f5db99",
                                  "28" = "#e89c81"),
                  Mating = c("NO" = "#f8e3d1", # Shuksan from pnw
                             "Early life" = "#d7b1c5",
                             "Throughout life" = "#ac8eab"),
                  Chromosome = c("2L" = "#d8aedd", # lake colours from pnw
                                 "2R" = "#cb74ad",
                                 "3L" = "#11c2b5",
                                 "3R" = "#72e1e1",
                                 "X" = "#fbcc74"))
# create a SNP annotation column

SNP_details <- annotation_SNPs %>%
  as.data.frame()

my_SNP_categories <- data.frame(row.names = SNP_details[, 1], Chromosome = SNP_details[, 2])

my_col_names <- c("Arya et al females", "Huang et al females", "Huang et al females",
                  "Huang et al females", "Wilson et al females 1", "Wilson et al females 2", "Durham et al females",
                  "Patel et al females", "Arya et al males", "Huang et al males", "Huang et al males",
                  "Huang et al males")


pheatmap(SNP_heatmap_e0, breaks = breaksList, 
         main = "", legend_labels = c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),
         color = colorRampPalette(rev(met.brewer("Benedictus", direction = 1)))(length(breaksList)),
         legend = TRUE, cutree_rows = 6, cutree_cols = 5, 
         angle_col = 45, border_color = "white",
         annotation_col = my_categories, annotation_colors = my_colors, annotation_row = my_SNP_categories,
         fontsize = 8, labels_col = my_col_names)

Lifespan equality

Show the code
SNP_heatmap_h <-
  all_h_meta_results %>% filter(meta_p_het < 1e-08) %>% 
  arrange(meta_p_het) %>% dplyr::select(1:13) 

row_name <- SNP_heatmap_h$SNP
SNP_heatmap_h <- SNP_heatmap_h %>% dplyr::select(-SNP) %>% as.matrix()
rownames(SNP_heatmap_h) = row_name

breaksList <- seq(-5, 5, by = 0.1)

annotation_SNPs_h <- 
  all_h_meta_results %>% filter(meta_p_het < 1e-08) %>% dplyr::select(SNP) %>% 
  mutate(Chromosome = case_when(str_detect(SNP, "2L") ~ "2L",
                                str_detect(SNP, "2R") ~ "2R",
                                str_detect(SNP, "3L") ~ "3L",
                                str_detect(SNP, "3R") ~ "3R",
                                str_detect(SNP, "X") ~ "X"))


# create a SNP annotation column

SNP_details_h <- annotation_SNPs_h %>%
  as.data.frame()

my_SNP_categories_h <- data.frame(row.names = SNP_details_h[, 1], Chromosome = SNP_details_h[, 2])

pheatmap(SNP_heatmap_h, breaks = breaksList, 
         main = "", legend_labels = c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),
         color = colorRampPalette(rev(met.brewer("Benedictus", direction = 1)))(length(breaksList)),
         legend = TRUE, cutree_rows = 3, cutree_cols = 4, angle_col = 45, border_color = "white",
         annotation_col = my_categories, annotation_colors = my_colors, 
         annotation_row = my_SNP_categories_h,
         fontsize = 8, labels_col = my_col_names)

Analysis of sexually antagonistic effects on ageing

The intersex genetic correlation

Lets take a quick look at the intersex-genetic correlations for our metrics in each relevant measurement condition.

Show the code
Arya_two_sexes_SNPS <-
  left_join(
    Arya_f_l_GWAS %>% 
      dplyr::select(1:2) %>% 
      rename(Female_e0_effect = BETA), 
    
    Arya_m_l_GWAS %>% 
      dplyr::select(1:2) %>% 
      rename(Male_e0_effect = BETA)
  ) %>% 
  left_join(Arya_f_h_GWAS %>% 
              dplyr::select(1:2) %>% 
              rename(Female_h_effect = BETA)) %>% 
  left_join(Arya_m_h_GWAS %>% 
              dplyr::select(1:2) %>% 
              rename(Male_h_effect = BETA))

Huang_18_two_sexes_SNPS <-
  left_join(
    Huang_f_18_l_GWAS %>% 
      dplyr::select(1:2) %>% 
      rename(Female_e0_effect = BETA), 
    
    Huang_m_18_l_GWAS %>% 
      dplyr::select(1:2) %>% 
      rename(Male_e0_effect = BETA)
  ) %>% 
  left_join(Huang_f_18_h_GWAS %>% 
              dplyr::select(1:2) %>% 
              rename(Female_h_effect = BETA)) %>% 
  left_join(Huang_m_18_h_GWAS %>% 
              dplyr::select(1:2) %>% 
              rename(Male_h_effect = BETA))


Huang_25_two_sexes_SNPS <-
  left_join(
    Huang_f_25_l_GWAS %>% 
      dplyr::select(1:2) %>% 
      rename(Female_e0_effect = BETA), 
    
    Huang_m_25_l_GWAS %>% 
      dplyr::select(1:2) %>% 
      rename(Male_e0_effect = BETA)
  ) %>% 
  left_join(Huang_f_25_h_GWAS %>% 
              dplyr::select(1:2) %>% 
              rename(Female_h_effect = BETA)) %>% 
  left_join(Huang_m_25_h_GWAS %>% 
              dplyr::select(1:2) %>% 
              rename(Male_h_effect = BETA))  


Huang_28_two_sexes_SNPS <-
  left_join(
    Huang_f_28_l_GWAS %>% 
      dplyr::select(1:2) %>% 
      rename(Female_e0_effect = BETA), 
    
    Huang_m_28_l_GWAS %>% 
      dplyr::select(1:2) %>% 
      rename(Male_e0_effect = BETA)
  ) %>% 
  left_join(Huang_f_28_h_GWAS %>% 
              dplyr::select(1:2) %>% 
              rename(Female_h_effect = BETA)) %>% 
  left_join(Huang_m_28_h_GWAS %>% 
              dplyr::select(1:2) %>% 
              rename(Male_h_effect = BETA))

Plot the within-treatment intersex-genetic correlations

Show the code
pal <- pnw_palette("Shuksan2", 100)

Arya_e0_sex_plot <-
  Arya_two_sexes_SNPS %>% 
  ggplot(aes(x = Female_e0_effect, y = Male_e0_effect )) +
  geom_vline(xintercept = 0, linetype = 2, linewidth = 1) +
  geom_hline(yintercept = 0, linetype = 2, linewidth = 1) +
  stat_binhex(bins = 50) +
  scale_fill_gradientn(colours = pal) +
  #geom_point() +
  #stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +
  coord_cartesian(xlim = c(-0.8, 0.8), ylim = c(-0.8, 0.8)) +
  labs(x = "Effect on female life expectancy", 
       y = "Effect on male\nlife expectancy",
       title = "Arya et al 2010: 25C") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 11))

Arya_h_sex_plot <-
  Arya_two_sexes_SNPS %>% 
  ggplot(aes(x = Female_h_effect, y = Male_h_effect )) +
  geom_vline(xintercept = 0, linetype = 2, linewidth = 1) +
  geom_hline(yintercept = 0, linetype = 2, linewidth = 1) +
  stat_binhex(bins = 50) +
  scale_fill_gradientn(colours = pal) +
  #geom_point() +
  #stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +
  coord_cartesian(xlim = c(-0.8, 0.8), ylim = c(-0.8, 0.8)) +
  labs(x = "Effect on female lifespan equality", 
       y = "Effect on male\nlifespan equality",
       title = "Arya et al 2010: 25C") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 11))

Huang_18_e0_sex_plot <-
  Huang_18_two_sexes_SNPS %>% 
  ggplot(aes(x = Female_e0_effect, y = Male_e0_effect )) +
  geom_vline(xintercept = 0, linetype = 2, linewidth = 1) +
  geom_hline(yintercept = 0, linetype = 2, linewidth = 1) +
  stat_binhex(bins = 50) +
  scale_fill_gradientn(colours = pal) +
  #geom_point() +
  #stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +
  coord_cartesian(xlim = c(-0.8, 0.8), ylim = c(-0.8, 0.8)) +
  labs(x = "Effect on female life expectancy", 
       y = "Effect on male\nlife expectancy",
       title = "Huang et al, 2020: 18C") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 11))

Huang_18_h_sex_plot <-
  Huang_18_two_sexes_SNPS %>% 
  ggplot(aes(x = Female_h_effect, y = Male_h_effect )) +
  geom_vline(xintercept = 0, linetype = 2, linewidth = 1) +
  geom_hline(yintercept = 0, linetype = 2, linewidth = 1) +
  stat_binhex(bins = 50) +
  scale_fill_gradientn(colours = pal) +
  #geom_point() +
  #stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +
  coord_cartesian(xlim = c(-0.8, 0.8), ylim = c(-0.8, 0.8)) +
  labs(x = "Effect on female lifespan equality", 
       y = "Effect on male\nlifespan equality",
       title = "Huang et al, 2020: 18C") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 11))

Huang_25_e0_sex_plot <-
  Huang_25_two_sexes_SNPS %>% 
  ggplot(aes(x = Female_e0_effect, y = Male_e0_effect )) +
  geom_vline(xintercept = 0, linetype = 2, linewidth = 1) +
  geom_hline(yintercept = 0, linetype = 2, linewidth = 1) +
  stat_binhex(bins = 50) +
  scale_fill_gradientn(colours = pal) +
  #geom_point() +
  #stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +
  coord_cartesian(xlim = c(-0.8, 0.8), ylim = c(-0.8, 0.8)) +
  labs(x = "Effect on female life expectancy", 
       y = "Effect on male\nlife expectancy",
       title = "Huang et al, 2020: 25C") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 11))

Huang_25_h_sex_plot <-
  Huang_25_two_sexes_SNPS %>% 
  ggplot(aes(x = Female_h_effect, y = Male_h_effect )) +
  geom_vline(xintercept = 0, linetype = 2, linewidth = 1) +
  geom_hline(yintercept = 0, linetype = 2, linewidth = 1) +
  stat_binhex(bins = 50) +
  scale_fill_gradientn(colours = pal) +
  #geom_point() +
  #stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +
  coord_cartesian(xlim = c(-0.8, 0.8), ylim = c(-0.8, 0.8)) +
  labs(x = "Effect on female lifespan equality", 
       y = "Effect on male\nlifespan equality",
       title = "Huang et al, 2020: 25C") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 11))

Huang_28_e0_sex_plot <-
  Huang_28_two_sexes_SNPS %>% 
  ggplot(aes(x = Female_e0_effect, y = Male_e0_effect )) +
  geom_vline(xintercept = 0, linetype = 2, linewidth = 1) +
  geom_hline(yintercept = 0, linetype = 2, linewidth = 1) +
  stat_binhex(bins = 50) +
  scale_fill_gradientn(colours = pal) +
  #geom_point() +
  #stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +
  coord_cartesian(xlim = c(-0.8, 0.8), ylim = c(-0.8, 0.8)) +
  labs(x = "Effect on female life expectancy", 
       y = "Effect on male\nlife expectancy",
       title = "Huang et al, 2020: 28C") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 11))

Huang_28_h_sex_plot <-
  Huang_28_two_sexes_SNPS %>% 
  ggplot(aes(x = Female_h_effect, y = Male_h_effect )) +
  geom_vline(xintercept = 0, linetype = 2, linewidth = 1) +
  geom_hline(yintercept = 0, linetype = 2, linewidth = 1) +
  stat_binhex(bins = 50) +
  scale_fill_gradientn(colours = pal) +
  #geom_point() +
  #stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +
  coord_cartesian(xlim = c(-0.8, 0.8), ylim = c(-0.8, 0.8)) +
  labs(x = "Effect on female lifespan equality", 
       y = "Effect on male\nlifespan equality",
       title = "Huang et al, 2020: 28C") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 11))

(Arya_e0_sex_plot + Arya_h_sex_plot) /
  (Huang_18_e0_sex_plot + Huang_18_h_sex_plot) / 
  (Huang_25_e0_sex_plot + Huang_25_h_sex_plot) / 
  (Huang_28_e0_sex_plot + Huang_28_h_sex_plot) 

Axis of antagonism and concordance

While there is a strong inter-sex genetic correlation for both life expectancy and lifespan equality, both correlations are significantly lower than one. Thus, it is likely that there are alleles that have sex-limited or sex-opposite effects on lifespan/lifespan equality. In an attempt to identify these genetic variants, we select those studies that measured lifespan in both sexes and calculate a sexual antagonism index for each line, following previous studies focused on similar questions (e.g. Berger et al, 2014, Grieshop and Arnqvist, 2018, Ruzicka et al, 2020). To create the index, we rotate the coordinate system of the female and male fitness plane by 45 degrees, by multiplying the 204 x 2 fitness matrix by the rotation matrix

\[\mathbf{R} = \begin{bmatrix} \frac{-1}{\sqrt{2}} & \frac{-1}{\sqrt{2}} \\[0.3em] \frac{-1}{\sqrt{2}} & \frac{1}{\sqrt{2}} \\[0.3em] \end{bmatrix}\]

Show the code
rotation_matrix <- 
  matrix(c(-1/sqrt(2), -1/sqrt(2), -1/sqrt(2), 1/sqrt(2)),
         nrow = 2)

rotate_axis_function <- function(data_1, data_2, Females, Males, metric){
  left_join(
    data_1 %>% rename(Females = metric),
    data_2 %>% rename(Males = metric)) %>% 
    dplyr::select(-line) %>% 
    as.matrix() %*% rotation_matrix %>% 
    as_tibble() %>% 
    rename(SC_axis  = V1,
           SA_axis = V2) %>% 
    bind_cols(left_join(
      data_1 %>% rename(Females = metric),
      data_2%>% rename(Males = metric))) %>% 
    dplyr::select(line, Females, Males, everything())
}
  
Arya_l_SA <-
  rotate_axis_function(Arya_f_l, Arya_m_l, 
                     Females = "Female_life_expectancy", 
                     Males = "Male_life_expectancy",
                     metric = "e0_scaled") 

Huang_18_l_SA <-
  rotate_axis_function(Huang_f_18_l, Huang_m_18_l, 
                     Females = "Female_life_expectancy", 
                     Males = "Male_life_expectancy",
                     metric = "e0_scaled") 

Huang_25_l_SA <-
  rotate_axis_function(Huang_f_25_l, Huang_m_25_l, 
                     Females = "Female_life_expectancy", 
                     Males = "Male_life_expectancy",
                     metric = "e0_scaled") 

Huang_28_l_SA <-
  rotate_axis_function(Huang_f_28_l, Huang_m_28_l, 
                     Females = "Female_life_expectancy", 
                     Males = "Male_life_expectancy",
                     metric = "e0_scaled") 

# now lifespan equality

Arya_h_SA <-
  rotate_axis_function(Arya_f_h, Arya_m_h, 
                     Females = "Female_lifespan_equality", 
                     Males = "Male_lifespan_equality",
                     metric = "h") 

Huang_18_h_SA <-
  rotate_axis_function(Huang_f_18_h, Huang_m_18_h, 
                     Females = "Female_lifespan_equality", 
                     Males = "Male_lifespan_equality",
                     metric = "h") 

Huang_25_h_SA <-
  rotate_axis_function(Huang_f_25_h, Huang_m_25_h, 
                     Females = "Female_lifespan_equality", 
                     Males = "Male_lifespan_equality",
                     metric = "h") 

Huang_28_h_SA <-
  rotate_axis_function(Huang_f_28_h, Huang_m_28_h, 
                     Females = "Female_lifespan_equality", 
                     Males = "Male_lifespan_equality",
                     metric = "h") 

To visualise this transformation lets plot the standardised life expectancy means presented in Arya et al 2010

Show the code
Arya_l_SA %>% 
  ggplot(aes(x = Females, y = Males)) +
  geom_vline(xintercept = 0, linetype = 2, linewidth = 1) +
  geom_hline(yintercept = 0, linetype = 2, linewidth = 1) +
  geom_abline(intercept = 0, slope = -1, linewidth = 1, linetype = 2, colour = "salmon") +
  geom_abline(intercept = 0, slope = 1, linewidth = 1, linetype = 2, colour = "salmon") +
  geom_hline(yintercept = 0, linetype = 2, linewidth = 1) +
  geom_point(aes(fill = SA_axis), shape = 21, size = 4) +
  scale_fill_moma_c("Avedon", direction = -1, limits = c(-2.5, 2.5)) +
  coord_cartesian(xlim = c(-4, 4), ylim = c(-4, 4)) +
  labs(fill = "SA index",
       x = "Life expectancy (female)",
       y = "Life expectancy (male)",
       title = "Arya et al 2010 data") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5))

Fig SX. Residual line means for female and male life expectancy. Points indicate a single genetic line. The sexual antagonism index ranges from male-beneficial and female-detrimental (orange points) to female-beneficial and male-detrimental (green points). The red dotted lines show the rotation that has been performed to create the antagonism and concordance axis.

Run univariate GWAS

Show the code
SA_e0_Arya <- prep_for_SA_GWAS(Arya_l_SA)
SA_e0_Huang_18 <- prep_for_SA_GWAS(Huang_18_l_SA)
SA_e0_Huang_25 <- prep_for_SA_GWAS(Huang_25_l_SA)
SA_e0_Huang_28 <- prep_for_SA_GWAS(Huang_28_l_SA)

SA_h_Arya <- prep_for_SA_GWAS(Arya_h_SA)
SA_h_Huang_18 <- prep_for_SA_GWAS(Huang_18_h_SA)
SA_h_Huang_25 <- prep_for_SA_GWAS(Huang_25_h_SA)
SA_h_Huang_28 <- prep_for_SA_GWAS(Huang_28_h_SA)

SC_e0_Arya <- prep_for_SC_GWAS(Arya_l_SA)
SC_e0_Huang_18 <- prep_for_SC_GWAS(Huang_18_l_SA)
SC_e0_Huang_25 <- prep_for_SC_GWAS(Huang_25_l_SA)
SC_e0_Huang_28 <- prep_for_SC_GWAS(Huang_28_l_SA)

SC_h_Arya <- prep_for_SC_GWAS(Arya_h_SA)
SC_h_Huang_18 <- prep_for_SC_GWAS(Huang_18_h_SA)
SC_h_Huang_25 <- prep_for_SC_GWAS(Huang_25_h_SA)
SC_h_Huang_28 <- prep_for_SC_GWAS(Huang_28_h_SA)

if(!file.exists("data/Derived/GWAS_results/SA_e0_Arya.tsv.gz")) {
run_GWAS(SA_e0_Arya)
run_GWAS(SA_e0_Huang_18)
run_GWAS(SA_e0_Huang_25)
run_GWAS(SA_e0_Huang_28)
run_GWAS(SA_h_Arya)
run_GWAS(SA_h_Huang_18)
run_GWAS(SA_h_Huang_25)
run_GWAS(SA_h_Huang_28)

run_GWAS(SC_e0_Arya)
run_GWAS(SC_e0_Huang_18)
run_GWAS(SC_e0_Huang_25)
run_GWAS(SC_e0_Huang_28)
run_GWAS(SC_h_Arya)
run_GWAS(SC_h_Huang_18)
run_GWAS(SC_h_Huang_25)
run_GWAS(SC_h_Huang_28)
}

SA_e0_Arya_GWAS <- read_tsv("data/Derived/GWAS_results/SA_e0_Arya.tsv.gz") 
SA_e0_Huang_18_GWAS <- read_tsv("data/Derived/GWAS_results/SA_e0_Huang_18.tsv.gz") 
SA_e0_Huang_25_GWAS <- read_tsv("data/Derived/GWAS_results/SA_e0_Huang_25.tsv.gz") 
SA_e0_Huang_28_GWAS <- read_tsv("data/Derived/GWAS_results/SA_e0_Huang_28.tsv.gz") 

SA_h_Arya_GWAS <- read_tsv("data/Derived/GWAS_results/SA_h_Arya.tsv.gz") 
SA_h_Huang_18_GWAS <- read_tsv("data/Derived/GWAS_results/SA_h_Huang_18.tsv.gz")
SA_h_Huang_25_GWAS <- read_tsv("data/Derived/GWAS_results/SA_h_Huang_25.tsv.gz")
SA_h_Huang_28_GWAS <- read_tsv("data/Derived/GWAS_results/SA_h_Huang_28.tsv.gz")

SC_e0_Arya_GWAS <- read_tsv("data/Derived/GWAS_results/SC_e0_Arya.tsv.gz") 
SC_e0_Huang_18_GWAS <- read_tsv("data/Derived/GWAS_results/SC_e0_Huang_18.tsv.gz") 
SC_e0_Huang_25_GWAS <- read_tsv("data/Derived/GWAS_results/SC_e0_Huang_25.tsv.gz") 
SC_e0_Huang_28_GWAS <- read_tsv("data/Derived/GWAS_results/SC_e0_Huang_28.tsv.gz") 

SC_h_Arya_GWAS <- read_tsv("data/Derived/GWAS_results/SC_h_Arya.tsv.gz") 
SC_h_Huang_18_GWAS <- read_tsv("data/Derived/GWAS_results/SC_h_Huang_18.tsv.gz")
SC_h_Huang_25_GWAS <- read_tsv("data/Derived/GWAS_results/SC_h_Huang_25.tsv.gz")
SC_h_Huang_28_GWAS <- read_tsv("data/Derived/GWAS_results/SC_h_Huang_28.tsv.gz")

How many variants do we find that have notable associations with the SA axis for life expectancy:

Table SX. Genotype to phenotype associations detected by univariate GWAS, for the life expectancy sexual antagonism axis. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.

Show the code
# filter down to sig associations

SA_e0_table <-
bind_rows(
SA_e0_Arya_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SA_e0_Arya_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Arya et al 2010",
         Temperature = "25",
         `Mating status` = "Virgin") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
  
SA_e0_Huang_18_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SA_e0_Huang_18_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Temperature = "18",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

SA_e0_Huang_25_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SA_e0_Huang_25_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Temperature = "25",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

SA_e0_Huang_28_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SA_e0_Huang_28_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Temperature = "28",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)) 

# how many unique variants have been detected?

SA_p_05_SNPs_e0 <-
  bind_rows(
    
    SA_e0_Arya_GWAS %>% 
      filter(P < 1e-05),
    
    SA_e0_Huang_18_GWAS %>% 
      filter(P < 1e-05),
  
    SA_e0_Huang_25_GWAS %>% 
      filter(P < 1e-05),
    
    SA_e0_Huang_28_GWAS %>% 
      filter(P < 1e-05)) %>% 
  distinct(SNP) %>% 
  nrow()


SA_e0_table %>% 
  add_row(Study = "Totals",
          Temperature = "",
          `Mating status` = "",
          `p < 1e-05 variants` = SA_p_05_SNPs_e0,
          `p < 1e-08 variants` = sum(SA_e0_table$`p < 1e-08 variants`)) %>% 
  kable() %>% 
  kable_styling()
Study Temperature Mating status p < 1e-05 variants p < 1e-08 variants
Arya et al 2010 25 Virgin 13 0
Huang et al 2020 18 Mated 18 0
Huang et al 2020 25 Mated 305 1
Huang et al 2020 28 Mated 15 0
Totals 351 1

Table SX. Genotype to phenotype associations detected by univariate GWAS, for the life expectancy sexual concordance axis. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.

Show the code
# filter down to sig associations

SC_e0_table <-
bind_rows(
SC_e0_Arya_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SC_e0_Arya_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Arya et al 2010",
         Temperature = "25",
         `Mating status` = "Virgin") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
  
SC_e0_Huang_18_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SC_e0_Huang_18_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Temperature = "18",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

SC_e0_Huang_25_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SC_e0_Huang_25_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Temperature = "25",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

SC_e0_Huang_28_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SC_e0_Huang_28_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Temperature = "28",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)) 

# how many unique variants have been detected?

SC_p_05_SNPs_e0 <-
  bind_rows(
    
    SC_e0_Arya_GWAS %>% 
      filter(P < 1e-05),
    
    SC_e0_Huang_18_GWAS %>% 
      filter(P < 1e-05),
  
    SC_e0_Huang_25_GWAS %>% 
      filter(P < 1e-05),
    
    SC_e0_Huang_28_GWAS %>% 
      filter(P < 1e-05)) %>% 
  distinct(SNP) %>% 
  nrow()


SC_e0_table %>% 
  add_row(Study = "Totals",
          Temperature = "",
          `Mating status` = "",
          `p < 1e-05 variants` = SC_p_05_SNPs_e0,
          `p < 1e-08 variants` = sum(SC_e0_table$`p < 1e-08 variants`)) %>% 
  kable() %>% 
  kable_styling()
Study Temperature Mating status p < 1e-05 variants p < 1e-08 variants
Arya et al 2010 25 Virgin 18 0
Huang et al 2020 18 Mated 24 0
Huang et al 2020 25 Mated 47 0
Huang et al 2020 28 Mated 31 0
Totals 119 0

Table SX. Genotype to phenotype associations detected by univariate GWAS, for the lifespan equality sexual antagonism axis. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.

Show the code
# filter down to sig associations

SA_h_table <-
bind_rows(
SA_h_Arya_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SA_h_Arya_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Arya et al 2010",
         Temperature = "25",
         `Mating status` = "Virgin") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
  
SA_h_Huang_18_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SA_h_Huang_18_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Temperature = "18",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

SA_h_Huang_25_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SA_h_Huang_25_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Temperature = "25",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

SA_h_Huang_28_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SA_h_Huang_28_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Temperature = "28",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)) 

# how many unique variants have been detected?

SA_p_05_SNPs_h <-
  bind_rows(
    
    SA_h_Arya_GWAS %>% 
      filter(P < 1e-05),
    
    SA_h_Huang_18_GWAS %>% 
      filter(P < 1e-05),
  
    SA_h_Huang_25_GWAS %>% 
      filter(P < 1e-05),
    
    SA_h_Huang_28_GWAS %>% 
      filter(P < 1e-05)) %>% 
  distinct(SNP) %>% 
  nrow()


SA_h_table %>% 
  add_row(Study = "Totals",
          Temperature = "",
          `Mating status` = "",
          `p < 1e-05 variants` = SA_p_05_SNPs_h,
          `p < 1e-08 variants` = sum(SA_h_table$`p < 1e-08 variants`)) %>% 
  kable() %>% 
  kable_styling()
Study Temperature Mating status p < 1e-05 variants p < 1e-08 variants
Arya et al 2010 25 Virgin 1 0
Huang et al 2020 18 Mated 14 0
Huang et al 2020 25 Mated 20 0
Huang et al 2020 28 Mated 12 0
Totals 47 0

Table SX. Genotype to phenotype associations detected by univariate GWAS, for the lifespan equality sexual concordance axis. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.

Show the code
# filter down to sig associations

SC_h_table <-
bind_rows(
SC_h_Arya_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SC_h_Arya_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Arya et al 2010",
         Temperature = "25",
         `Mating status` = "Virgin") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
  
SC_h_Huang_18_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SC_h_Huang_18_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Temperature = "18",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

SC_h_Huang_25_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SC_h_Huang_25_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Temperature = "25",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

SC_h_Huang_28_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(SC_h_Huang_28_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Temperature = "28",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)) 

# how many unique variants have been detected?

SC_p_05_SNPs_h <-
  bind_rows(
    
    SC_h_Arya_GWAS %>% 
      filter(P < 1e-05),
    
    SC_h_Huang_18_GWAS %>% 
      filter(P < 1e-05),
  
    SC_h_Huang_25_GWAS %>% 
      filter(P < 1e-05),
    
    SC_h_Huang_28_GWAS %>% 
      filter(P < 1e-05)) %>% 
  distinct(SNP) %>% 
  nrow()


SC_h_table %>% 
  add_row(Study = "Totals",
          Temperature = "",
          `Mating status` = "",
          `p < 1e-05 variants` = SC_p_05_SNPs_h,
          `p < 1e-08 variants` = sum(SC_h_table$`p < 1e-08 variants`)) %>% 
  kable() %>% 
  kable_styling()
Study Temperature Mating status p < 1e-05 variants p < 1e-08 variants
Arya et al 2010 25 Virgin 22 0
Huang et al 2020 18 Mated 2 0
Huang et al 2020 25 Mated 37 0
Huang et al 2020 28 Mated 52 0
Totals 100 0

Run CPASSOC

Generate the genetic correlation matrix

We calculate the genetic correlations between traits using both the line mean and SNP effect size comparisons. We use the SNP correlations following Zhu et al (2015) and show these in the below plots. Note that while the two approaches produce results that have a strong positive correlation, there are small differences that will affect downstream analyses.

Build the SA correlations matrices

Show the code
# use the BETA coefficients to build the SNP correlation matrix

SNP_SA_data_e0 <-
  bind_rows(
    SA_e0_Arya_GWAS %>% 
      mutate(Study = "Arya", Temperature = 25),
    SA_e0_Huang_18_GWAS %>% 
      mutate(Study= "Huang", Temperature= 18),
    SA_e0_Huang_25_GWAS %>% 
      mutate(Study= "Huang", Temperature= 25),
    SA_e0_Huang_28_GWAS %>% 
      mutate(Study= "Huang", Temperature= 28)) %>% 
  dplyr::select(SNP, BETA, Study, Temperature) %>% 
  pivot_wider(values_from = BETA, names_from = c(Study, Temperature)) %>% 
  dplyr::select(-SNP) 

SNP_SA_data_h <-
  bind_rows(
    SA_h_Arya_GWAS %>% 
      mutate(Study = "Arya", Temperature = 25),
    SA_h_Huang_18_GWAS %>% 
      mutate(Study= "Huang", Temperature= 18),
    SA_h_Huang_25_GWAS %>% 
      mutate(Study= "Huang", Temperature= 25),
    SA_h_Huang_28_GWAS %>% 
      mutate(Study= "Huang", Temperature= 28)) %>% 
  dplyr::select(SNP, BETA, Study, Temperature) %>% 
  pivot_wider(values_from = BETA, names_from = c(Study, Temperature)) %>% 
  dplyr::select(-SNP) 
  

SNP_SA_e0_corr_matrix <- cor(SNP_SA_data_e0, use = "pairwise.complete.obs", method = "spearman")
SNP_SA_h_corr_matrix <- cor(SNP_SA_data_h, use = "pairwise.complete.obs", method = "spearman")

SA_e0_line_data <-
  bind_rows(Arya_l_SA %>% mutate(Study = "Arya_25"),
            Huang_18_l_SA %>% mutate(Study = "Huang_18"),
            Huang_25_l_SA %>% mutate(Study = "Huang_25"),
            Huang_28_l_SA %>% mutate(Study = "Huang_28")) %>% 
  dplyr::select(line, Study, SA_axis) %>% 
  pivot_wider(values_from = SA_axis, names_from = Study) %>% 
  dplyr::select(-line)

SA_h_line_data <-
  bind_rows(Arya_h_SA %>% mutate(Study = "Arya_25"),
            Huang_18_h_SA %>% mutate(Study = "Huang_18"),
            Huang_25_h_SA %>% mutate(Study = "Huang_25"),
            Huang_28_h_SA %>% mutate(Study = "Huang_28")) %>% 
  dplyr::select(line, Study, SA_axis) %>% 
  pivot_wider(values_from = SA_axis, names_from = Study) %>% 
  dplyr::select(-line)

SA_line_e0_corr_matrix <- cor(SA_e0_line_data, use = "pairwise.complete.obs", method = "spearman")
SA_line_h_corr_matrix <- cor(SA_h_line_data, use = "pairwise.complete.obs", method = "spearman")

Build the SC correlations matrices

Show the code
# use the BETA coefficients to build the SNP correlation matrix

SNP_SC_data_e0 <-
  bind_rows(
    SC_e0_Arya_GWAS %>% 
      mutate(Study = "Arya", Temperature = 25),
    SC_e0_Huang_18_GWAS %>% 
      mutate(Study= "Huang", Temperature= 18),
    SC_e0_Huang_25_GWAS %>% 
      mutate(Study= "Huang", Temperature= 25),
    SC_e0_Huang_28_GWAS %>% 
      mutate(Study= "Huang", Temperature= 28)) %>% 
  dplyr::select(SNP, BETA, Study, Temperature) %>% 
  pivot_wider(values_from = BETA, names_from = c(Study, Temperature)) %>% 
  dplyr::select(-SNP) 

SNP_SC_data_h <-
  bind_rows(
    SC_h_Arya_GWAS %>% 
      mutate(Study = "Arya", Temperature = 25),
    SC_h_Huang_18_GWAS %>% 
      mutate(Study= "Huang", Temperature= 18),
    SC_h_Huang_25_GWAS %>% 
      mutate(Study= "Huang", Temperature= 25),
    SC_h_Huang_28_GWAS %>% 
      mutate(Study= "Huang", Temperature= 28)) %>% 
  dplyr::select(SNP, BETA, Study, Temperature) %>% 
  pivot_wider(values_from = BETA, names_from = c(Study, Temperature)) %>% 
  dplyr::select(-SNP) 
  

SNP_SC_e0_corr_matrix <- cor(SNP_SC_data_e0, use = "pairwise.complete.obs", method = "spearman")
SNP_SC_h_corr_matrix <- cor(SNP_SC_data_h, use = "pairwise.complete.obs", method = "spearman")

SC_e0_line_data <-
  bind_rows(Arya_l_SA %>% mutate(Study = "Arya_25"),
            Huang_18_l_SA %>% mutate(Study = "Huang_18"),
            Huang_25_l_SA %>% mutate(Study = "Huang_25"),
            Huang_28_l_SA %>% mutate(Study = "Huang_28")) %>% 
  dplyr::select(line, Study, SC_axis) %>% 
  pivot_wider(values_from = SC_axis, names_from = Study) %>% 
  dplyr::select(-line)

SC_h_line_data <-
  bind_rows(Arya_h_SA %>% mutate(Study = "Arya_25"),
            Huang_18_h_SA %>% mutate(Study = "Huang_18"),
            Huang_25_h_SA %>% mutate(Study = "Huang_25"),
            Huang_28_h_SA %>% mutate(Study = "Huang_28")) %>% 
  dplyr::select(line, Study, SC_axis) %>% 
  pivot_wider(values_from = SC_axis, names_from = Study) %>% 
  dplyr::select(-line)

SC_line_e0_corr_matrix <- cor(SC_e0_line_data, use = "pairwise.complete.obs", method = "spearman")
SC_line_h_corr_matrix <- cor(SC_h_line_data, use = "pairwise.complete.obs", method = "spearman")

Calculate meta-analytic test statistics

The purpose of these meta-analysis is to search for SNPs that have a) sex-specific and b) sexually concordant effects on ageing.

To conduct CPASSOC for a given SNP, we need a summary statistic from each GWAS. A different number of lines were included in each GWAS, which can cause small differences in the number of SNPs assessed. We therefore prune the list of SNPs to those included in all univariate analyses. After pruning, 1,619,501 SNPs remain.

Sex-antagonism and concordance for life expectancy

First run CPASSOC for the SA axis

Show the code
SA_Arya_l_T <- SA_e0_Arya_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Arya = T)
  
SA_Huang_18_l_T <- SA_e0_Huang_18_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_18 = T)

SA_Huang_25_l_T <- SA_e0_Huang_25_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_25 = T)

SA_Huang_28_l_T <- SA_e0_Huang_28_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_28 = T)

SA_e0_effect_sizes <-
  SA_Arya_l_T %>%
  inner_join(SA_Huang_18_l_T, by = "SNP") %>%
  inner_join(SA_Huang_25_l_T, by = "SNP") %>%
  inner_join(SA_Huang_28_l_T, by = "SNP") 


SA_e0_effect_size_values <-
  SA_e0_effect_sizes %>% 
  dplyr::select(2:5)

Sample_size_SA <- c(165, 183, 186, 177) 

if(!file.exists("data/Derived/GWAS_results/SA_e0_meta_results.csv")) {

# run the homogeneous effect meta-analysis

S_hom <- SHom(SA_e0_effect_size_values, Sample_size_SA, SNP_SA_e0_corr_matrix)

# calculate meta-p-values and bind the two together with the SNP names

p_S_hom <- pchisq(S_hom, df = 1, ncp = 0, lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_hom) %>% 
  rename(meta_p_hom = value, 
         S_hom = ...2)

# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits e.g. if a SNP has a sex or environment opposite effect on lifespan

# estimate parameters of gamma distribution

para <- EstimateGamma(N = 1E4, Sample_size_SA, SNP_SA_e0_corr_matrix);

S_het <- SHet(SA_e0_effect_size_values, Sample_size_SA, SNP_SA_e0_corr_matrix)

# obtain P-values of S_Het using the estimated gamma parameters
  
p_S_het <- pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_het) %>% 
  rename(meta_p_het = value, 
         S_het = ...2)


SA_e0_meta_results <- 
  SA_e0_effect_sizes %>% 
  bind_cols(p_S_hom,
            p_S_het) # add the unadjusted p values

write_csv(SA_e0_meta_results, "data/Derived/GWAS_results/SA_e0_meta_results.csv")

} else SA_e0_meta_results <- read_csv("data/Derived/GWAS_results/SA_e0_meta_results.csv")

Now run it for the SC axis

Show the code
SC_Arya_l_T <- SC_e0_Arya_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Arya = T)
  
SC_Huang_18_l_T <- SC_e0_Huang_18_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_18 = T)

SC_Huang_25_l_T <- SC_e0_Huang_25_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_25 = T)

SC_Huang_28_l_T <- SC_e0_Huang_28_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_28 = T)

SC_e0_effect_sizes <-
  SC_Arya_l_T %>%
  inner_join(SC_Huang_18_l_T, by = "SNP") %>%
  inner_join(SC_Huang_25_l_T, by = "SNP") %>%
  inner_join(SC_Huang_28_l_T, by = "SNP") 

SC_e0_effect_size_values <-
  SC_e0_effect_sizes %>% 
  dplyr::select(2:5)

Sample_size_SC <- c(165, 183, 186, 177) 

if(!file.exists("data/Derived/GWAS_results/SC_e0_meta_results.csv")) {

# run the homogeneous effect meta-analysis

S_hom <- SHom(SC_e0_effect_size_values, Sample_size_SC, SNP_SC_e0_corr_matrix)

# calculate meta-p-values and bind the two together with the SNP names

p_S_hom <- pchisq(S_hom, df = 1, ncp = 0, lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_hom) %>% 
  rename(meta_p_hom = value, 
         S_hom = ...2)

# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits e.g. if a SNP has a sex or environment opposite effect on lifespan

# estimate parameters of gamma distribution

para <- EstimateGamma(N = 1E4, Sample_size_SC, SNP_SC_e0_corr_matrix);

S_het <- SHet(SC_e0_effect_size_values, Sample_size_SC, SNP_SC_e0_corr_matrix)

# obtain P-values of S_Het using the estimated gamma parameters
  
p_S_het <- pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_het) %>% 
  rename(meta_p_het = value, 
         S_het = ...2)


SC_e0_meta_results <- 
  SC_e0_effect_sizes %>% 
  bind_cols(p_S_hom,
            p_S_het) # add the unadjusted p values

write_csv(SC_e0_meta_results, "data/Derived/GWAS_results/SC_e0_meta_results.csv")

} else SC_e0_meta_results <- read_csv("data/Derived/GWAS_results/SC_e0_meta_results.csv")

Sex-antagonism and concordance for lifespan equality

Sexual antagonism

Show the code
# lifespan equality

SA_Arya_h_T <- SA_h_Arya_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Arya = T)
  
SA_Huang_18_h_T <- SA_h_Huang_18_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_18 = T)

SA_Huang_25_h_T <- SA_h_Huang_25_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_25 = T)

SA_Huang_28_h_T <- SA_h_Huang_28_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_28 = T)

SA_h_effect_sizes <-
  SA_Arya_h_T %>%
  inner_join(SA_Huang_18_h_T, by = "SNP") %>%
  inner_join(SA_Huang_25_h_T, by = "SNP") %>%
  inner_join(SA_Huang_28_h_T, by = "SNP") 


SA_h_effect_size_values <-
  SA_h_effect_sizes %>% 
  dplyr::select(2:5)

if(!file.exists("data/Derived/GWAS_results/SA_h_meta_results.csv")) {

S_hom <- SHom(SA_h_effect_size_values, Sample_size_SA, SNP_SA_h_corr_matrix)

# calculate meta-p-values and bind the two together with the SNP names

p_S_hom <- pchisq(S_hom, df = 1, ncp = 0, lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_hom) %>% 
  rename(meta_p_hom = value, 
         S_hom = ...2)

# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits (probably not such a problem here given all traits we investigate are some version of life expectancy)

# estimate parameters of gamma distribution

para <- EstimateGamma(N = 1E4, Sample_size_SA, SNP_SA_h_corr_matrix);

S_het <- SHet(SA_h_effect_size_values, Sample_size_SA, SNP_SA_h_corr_matrix)

# obtain P-values of S_Het using the estimated gamma parameters
  
p_S_het <- pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_het) %>% 
  rename(meta_p_het = value, 
         S_het = ...2)


SA_h_meta_results <- 
  SA_h_effect_sizes %>% 
  bind_cols(p_S_hom,
            p_S_het) # add the unadjusted p values

write_csv(SA_h_meta_results, "data/Derived/GWAS_results/SA_h_meta_results.csv")
} else SA_h_meta_results <- read_csv("data/Derived/GWAS_results/SA_h_meta_results.csv")

Sexual concordance

Show the code
# lifespan equality

SC_Arya_h_T <- SC_h_Arya_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Arya = T)
  
SC_Huang_18_h_T <- SC_h_Huang_18_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_18 = T)

SC_Huang_25_h_T <- SC_h_Huang_25_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_25 = T)

SC_Huang_28_h_T <- SC_h_Huang_28_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_28 = T)

SC_h_effect_sizes <-
  SC_Arya_h_T %>%
  inner_join(SC_Huang_18_h_T, by = "SNP") %>%
  inner_join(SC_Huang_25_h_T, by = "SNP") %>%
  inner_join(SC_Huang_28_h_T, by = "SNP") 


SC_h_effect_size_values <-
  SC_h_effect_sizes %>% 
  dplyr::select(2:5)

if(!file.exists("data/Derived/GWAS_results/SC_h_meta_results.csv")) {

S_hom <- SHom(SC_h_effect_size_values, Sample_size_SC, SNP_SC_h_corr_matrix)

# calculate meta-p-values and bind the two together with the SNP names

p_S_hom <- pchisq(S_hom, df = 1, ncp = 0, lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_hom) %>% 
  rename(meta_p_hom = value, 
         S_hom = ...2)

# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits (probably not such a problem here given all traits we investigate are some version of life expectancy)

# estimate parameters of gamma distribution

para <- EstimateGamma(N = 1E4, Sample_size_SC, SNP_SC_h_corr_matrix);

S_het <- SHet(SC_h_effect_size_values, Sample_size_SC, SNP_SC_h_corr_matrix)

# obtain P-values of S_Het using the estimated gamma parameters
  
p_S_het <- pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_het) %>% 
  rename(meta_p_het = value, 
         S_het = ...2)


SC_h_meta_results <- 
  SC_h_effect_sizes %>% 
  bind_cols(p_S_hom,
            p_S_het) # add the unadjusted p values

write_csv(SC_h_meta_results, "data/Derived/GWAS_results/SC_h_meta_results.csv")
} else SC_h_meta_results <- read_csv("data/Derived/GWAS_results/SC_h_meta_results.csv")

Visualise the results

We combine GWAS summary statistics calculated from lifespan data measured across different contexts. It’s possible that some SNPs have G x E interactions that would lead to a heterogeneous effect across phenotypes. We therefore utilise the S_het calculated p-values.

First lets show the effect of CPASSOC on the number of variants found to be associated with life expectancy and lifespan equality.

Table XX. Genotype to phenotype associations detected by univariate GWAS, for lifespan equality. The total row shows the number of unique candidate variants identified across all studies.

Show the code
tibble(Analysis = c("CPASSOC", "Univariate", "CPASSOC", "Univariate", "CPASSOC", "Univariate", "CPASSOC", "Univariate"),
       Trait = c("Life expectancy SA", "Life expectancy SA", "Life expectancy SC", "Life expectancy SC",
                 "Lifespan equality SA", "Lifespan equality SA", "Lifespan equality SC", "Lifespan equality SC"),
       `p < 1e-05 variants` = c(sum(SA_e0_meta_results$meta_p_het < 1e-05),
                                  SA_p_05_SNPs_e0,
                                sum(SC_e0_meta_results$meta_p_het < 1e-05),
                                  SC_p_05_SNPs_e0,
                                  sum(SA_h_meta_results$meta_p_het < 1e-05),
                                  SA_p_05_SNPs_h,
                                 sum(SC_h_meta_results$meta_p_het < 1e-05),
                                  SC_p_05_SNPs_h),
       `p < 1e-08 variants` = c(sum(SA_e0_meta_results$meta_p_het < 1e-08),
                                  sum(SA_e0_table$`p < 1e-08 variants`),
                                sum(SC_e0_meta_results$meta_p_het < 1e-08),
                                  sum(SC_e0_table$`p < 1e-08 variants`),
                                  sum(SA_h_meta_results$meta_p_het < 1e-08),
                                  sum(SA_h_table$`p < 1e-08 variants`),
                                 sum(SC_h_meta_results$meta_p_het < 1e-08),
                                  sum(SC_h_table$`p < 1e-08 variants`)))  %>% 
  kable() %>% 
  kable_styling()
Analysis Trait p < 1e-05 variants p < 1e-08 variants
CPASSOC Life expectancy SA 74 1
Univariate Life expectancy SA 351 1
CPASSOC Life expectancy SC 412 62
Univariate Life expectancy SC 119 0
CPASSOC Lifespan equality SA 38 0
Univariate Lifespan equality SA 47 0
CPASSOC Lifespan equality SC 188 32
Univariate Lifespan equality SC 100 0
Show the code
#SA_e0_meta_results %>% filter(meta_p_hom < 1e-05) %>% 
 # anti_join(SA_p_05_SNPs_e0) %>% arrange(meta_p_hom)
#SA_p_05_SNPs_e0

Table SX…variants that have sex-specific associations with life expectancy.

Show the code
# join gene annotations with the list of analysed variants 
# note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.

SA_e0_variants <-
  SA_e0_meta_results  %>%
  filter(meta_p_het < 1e-08) %>% 
  dplyr::select(SNP, S_het, meta_p_het) %>%
  left_join(annotations %>% filter(distance.to.gene <= 500)) %>% 
  mutate(meta_p_het = round(meta_p_het*10^11, 2)/10^11,
         S_het = round(S_het, 3)) %>% 
  dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)

SA_e0_variants %>% 
  my_data_table()

Table SX…variants that have sexually concordant associations with life expectancy.

Show the code
# join gene annotations with the list of analysed variants 
# note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.

SC_e0_variants <-
  SC_e0_meta_results  %>%
  filter(meta_p_het < 1e-08) %>% 
  dplyr::select(SNP, S_het, meta_p_het) %>%
  left_join(annotations %>% filter(distance.to.gene <= 500)) %>% 
  mutate(meta_p_het = round(meta_p_het*10^18, 2)/10^18,
         S_het = round(S_het, 3)) %>% 
  dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)

SC_e0_variants %>% 
  my_data_table()

Table SX…variants that have sexually concordant associations with life expectancy. There are no variants with strong sex-specific associations with lifespan equality.

Show the code
# join gene annotations with the list of analysed variants 
# note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.

SC_h_variants <-
  SC_h_meta_results  %>%
  filter(meta_p_het < 1e-08) %>% 
  dplyr::select(SNP, S_het, meta_p_het) %>%
  left_join(annotations %>% filter(distance.to.gene <= 500)) %>% 
  mutate(meta_p_het = round(meta_p_het*10^8, 4)/10^8,
         S_het = round(S_het, 3)) %>% 
  dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)

SC_h_variants %>% 
  my_data_table()

Now lets build some ‘Manhattan plots’ to show where these significant associations can be found:

Show the code
SA_e0_results <- 
  SA_e0_meta_results %>% 
  dplyr::select(SNP, meta_p_hom, meta_p_het) %>% 
  rename(P = meta_p_het) 

SC_e0_results <- 
  SC_e0_meta_results %>% 
  dplyr::select(SNP, meta_p_hom, meta_p_het) %>% 
  rename(P = meta_p_het)
  
SA_h_results <- 
  SA_h_meta_results %>% 
  dplyr::select(SNP, meta_p_hom, meta_p_het) %>% 
  rename(P = meta_p_het)

SC_h_results <- 
  SC_h_meta_results %>% 
  dplyr::select(SNP, meta_p_hom, meta_p_het) %>% 
  rename(P = meta_p_het)

# plot the results using the Manhattan plot custom function we defined earlier

SA_e0_het_plot <- 
  build_manhattan_plot(SA_e0_results) +
  labs(title = "SA axis for life expectancy") +
  theme(plot.title = element_text(size = 20, hjust = 0.5)) +
  coord_cartesian(ylim = c(0, 15)) + 
  xlab(NULL)

SC_e0_het_plot <- 
  build_manhattan_plot(SC_e0_results) +
  labs(title = "SC axis for life expectancy") +
  theme(plot.title = element_text(size = 20, hjust = 0.5)) +
  coord_cartesian(ylim = c(0, 15))

SA_h_het_plot <-
  build_manhattan_plot(SA_h_results) +
  labs(title = "SA axis for lifespan equality") +
  theme(plot.title = element_text(size = 20, hjust = 0.5)) +
  coord_cartesian(ylim = c(0, 15)) + 
  xlab(NULL)

SC_h_het_plot <-
  build_manhattan_plot(SC_h_results) +
  labs(title = "SC axis for lifespan equality") +
  theme(plot.title = element_text(size = 20, hjust = 0.5)) +
  coord_cartesian(ylim = c(0, 15))

SA_e0_het_plot + SA_h_het_plot + SC_e0_het_plot + SC_h_het_plot + 
  plot_layout(ncol = 2)

Figure SX. xxx

Investigating the rate of ageing and the scaling of mortality risk

Creating axes of ageing rate and scaling

We’ve shown that perpendicular deviation from the regression of lifespan equality on life expectancy closely corresponds to the rate of ageing (\(b\)) parameter in a gompertz mortality model. To identify regions of the genome associated with the rate of ageing, we can calculate a rate of ageing index for each line in each treatment, by once again using rotation matrices. To create this index, we rotate the coordinate system of the life expectancy and lifespan equality plane by \(\theta\) degrees, where \(\theta\) is the angle between the positive x-axis and the regression of lifespan equality on life expectancy (mean centered and standarised to \(\sigma\) = 1).

Finding the slopes

Show the code
Arya_f_model <- brm(h_scaled ~ 1 + e0_scaled,
            prior = c(prior(normal(0, 0.1), class = Intercept),
                      prior(normal(0, 1), class = b),
                      prior(exponential(1), class = sigma)),
            family = gaussian,
            iter = 6000, warmup = 2000,
            control = list(adapt_delta = 0.8, max_treedepth = 10),
            data = Arya_2010_f, chains = 4, cores = 4, file = "data/Derived/Ageing_axis_slopes/Arya_f_slope",
            #backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),
            refresh = 400, silent = 0, seed = 1)

Arya_f_slope <-
  as_draws_df(Arya_f_model) %>% 
  as_tibble() %>% 
  dplyr::select(b_e0_scaled) %>% 
  summarise(slope = mean(b_e0_scaled)) %>% pull(slope)

Arya_m_model <- brm(h_scaled ~ 1 + e0_scaled,
            prior = c(prior(normal(0, 0.1), class = Intercept),
                      prior(normal(0, 1), class = b),
                      prior(exponential(1), class = sigma)),
            family = gaussian,
            iter = 6000, warmup = 2000,
            control = list(adapt_delta = 0.8, max_treedepth = 10),
            data = Arya_2010_m, chains = 4, cores = 4, file = "data/Derived/Ageing_axis_slopes/Arya_m_slope",
            #backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),
            refresh = 400, silent = 0, seed = 1)

Arya_m_slope <-
  as_draws_df(Arya_m_model) %>% 
  as_tibble() %>% 
  dplyr::select(b_e0_scaled) %>% 
  summarise(slope = mean(b_e0_scaled)) %>% pull(slope)

Huang_f_18_model <- brm(h_scaled ~ 1 + e0_scaled,
            prior = c(prior(normal(0, 0.1), class = Intercept),
                      prior(normal(0, 1), class = b),
                      prior(exponential(1), class = sigma)),
            family = gaussian,
            iter = 6000, warmup = 2000,
            control = list(adapt_delta = 0.8, max_treedepth = 10),
            data = Huang_2020_f_18, chains = 4, cores = 4, 
            file = "data/Derived/Ageing_axis_slopes/Huang_f_18_slope",
            #backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),
            refresh = 400, silent = 0, seed = 1)

Huang_f_18_slope <-
  as_draws_df(Huang_f_18_model) %>% 
  as_tibble() %>% 
  dplyr::select(b_e0_scaled) %>% 
  summarise(slope = mean(b_e0_scaled)) %>% pull(slope)

Huang_m_18_model <- brm(h_scaled ~ 1 + e0_scaled,
            prior = c(prior(normal(0, 0.1), class = Intercept),
                      prior(normal(0, 1), class = b),
                      prior(exponential(1), class = sigma)),
            family = gaussian,
            iter = 6000, warmup = 2000,
            control = list(adapt_delta = 0.8, max_treedepth = 10),
            data = Huang_2020_m_18, chains = 4, cores = 4, 
            file = "data/Derived/Ageing_axis_slopes/Huang_m_18_slope",
            #backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),
            refresh = 400, silent = 0, seed = 1)

Huang_m_18_slope <-
  as_draws_df(Huang_m_18_model) %>% 
  as_tibble() %>% 
  dplyr::select(b_e0_scaled) %>% 
  summarise(slope = mean(b_e0_scaled)) %>% pull(slope)

Huang_f_25_model <- brm(h_scaled ~ 1 + e0_scaled,
            prior = c(prior(normal(0, 0.1), class = Intercept),
                      prior(normal(0, 1), class = b),
                      prior(exponential(1), class = sigma)),
            family = gaussian,
            iter = 6000, warmup = 2000,
            control = list(adapt_delta = 0.8, max_treedepth = 10),
            data = Huang_2020_f_25, chains = 4, cores = 4, 
            file = "data/Derived/Ageing_axis_slopes/Huang_f_25_slope",
            #backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),
            refresh = 400, silent = 0, seed = 1)

Huang_f_25_slope <-
  as_draws_df(Huang_f_25_model) %>% 
  as_tibble() %>% 
  dplyr::select(b_e0_scaled) %>% 
  summarise(slope = mean(b_e0_scaled)) %>% pull(slope)

Huang_m_25_model <- brm(h_scaled ~ 1 + e0_scaled,
            prior = c(prior(normal(0, 0.1), class = Intercept),
                      prior(normal(0, 1), class = b),
                      prior(exponential(1), class = sigma)),
            family = gaussian,
            iter = 6000, warmup = 2000,
            control = list(adapt_delta = 0.8, max_treedepth = 10),
            data = Huang_2020_m_25, chains = 4, cores = 4, 
            file = "data/Derived/Ageing_axis_slopes/Huang_m_25_slope",
            #backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),
            refresh = 400, silent = 0, seed = 1)

Huang_m_25_slope <-
  as_draws_df(Huang_m_25_model) %>% 
  as_tibble() %>% 
  dplyr::select(b_e0_scaled) %>% 
  summarise(slope = mean(b_e0_scaled)) %>% pull(slope)

Huang_f_28_model <- brm(h_scaled ~ 1 + e0_scaled,
            prior = c(prior(normal(0, 0.1), class = Intercept),
                      prior(normal(0, 1), class = b),
                      prior(exponential(1), class = sigma)),
            family = gaussian,
            iter = 6000, warmup = 2000,
            control = list(adapt_delta = 0.8, max_treedepth = 10),
            data = Huang_2020_f_28, chains = 4, cores = 4, 
            file = "data/Derived/Ageing_axis_slopes/Huang_f_28_slope",
            #backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),
            refresh = 400, silent = 0, seed = 1)

Huang_f_28_slope <-
  as_draws_df(Huang_f_28_model) %>% 
  as_tibble() %>% 
  dplyr::select(b_e0_scaled) %>% 
  summarise(slope = mean(b_e0_scaled)) %>% pull(slope)

Huang_m_28_model <- brm(h_scaled ~ 1 + e0_scaled,
            prior = c(prior(normal(0, 0.1), class = Intercept),
                      prior(normal(0, 1), class = b),
                      prior(exponential(1), class = sigma)),
            family = gaussian,
            iter = 6000, warmup = 2000,
            control = list(adapt_delta = 0.8, max_treedepth = 10),
            data = Huang_2020_m_28, chains = 4, cores = 4, 
            file = "data/Derived/Ageing_axis_slopes/Huang_m_28_slope",
            #backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),
            refresh = 400, silent = 0, seed = 1)

Huang_m_28_slope <-
  as_draws_df(Huang_m_28_model) %>% 
  as_tibble() %>% 
  dplyr::select(b_e0_scaled) %>% 
  summarise(slope = mean(b_e0_scaled)) %>% pull(slope)

Wilson_f_model_1 <- brm(h_scaled ~ 1 + e0_scaled,
            prior = c(prior(normal(0, 0.1), class = Intercept),
                      prior(normal(0, 1), class = b),
                      prior(exponential(1), class = sigma)),
            family = gaussian,
            iter = 6000, warmup = 2000,
            control = list(adapt_delta = 0.8, max_treedepth = 10),
            data = Wilson_2020_f_1, chains = 4, cores = 4, 
            file = "data/Derived/Ageing_axis_slopes/Wilson_f_slope_1",
            #backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),
            refresh = 400, silent = 0, seed = 1)

Wilson_f_slope_1 <-
  as_draws_df(Wilson_f_model_1) %>% 
  as_tibble() %>% 
  dplyr::select(b_e0_scaled) %>% 
  summarise(slope = mean(b_e0_scaled)) %>% pull(slope)

Wilson_f_model_2 <- brm(h_scaled ~ 1 + e0_scaled,
            prior = c(prior(normal(0, 0.1), class = Intercept),
                      prior(normal(0, 1), class = b),
                      prior(exponential(1), class = sigma)),
            family = gaussian,
            iter = 6000, warmup = 2000,
            control = list(adapt_delta = 0.8, max_treedepth = 10),
            data = Wilson_2020_f_2, chains = 4, cores = 4, 
            file = "data/Derived/Ageing_axis_slopes/Wilson_f_slope_2",
            #backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),
            refresh = 400, silent = 0, seed = 1)

Wilson_f_slope_2 <-
  as_draws_df(Wilson_f_model_2) %>% 
  as_tibble() %>% 
  dplyr::select(b_e0_scaled) %>% 
  summarise(slope = mean(b_e0_scaled)) %>% pull(slope)

Durham_f_model <- brm(h_scaled ~ 1 + e0_scaled,
            prior = c(prior(normal(0, 0.1), class = Intercept),
                      prior(normal(0, 1), class = b),
                      prior(exponential(1), class = sigma)),
            family = gaussian,
            iter = 6000, warmup = 2000,
            control = list(adapt_delta = 0.8, max_treedepth = 10),
            data = Durham_2014_f_adjusted, chains = 4, cores = 4, file = "data/Derived/Ageing_axis_slopes/Durham_f_slope",
            #backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),
            refresh = 400, silent = 0, seed = 1)

Durham_f_slope <-
  as_draws_df(Durham_f_model) %>% 
  as_tibble() %>% 
  dplyr::select(b_e0_scaled) %>% 
  summarise(slope = mean(b_e0_scaled)) %>% pull(slope)


Patel_f_model <- brm(h_scaled ~ 1 + e0_scaled,
            prior = c(prior(normal(0, 0.1), class = Intercept),
                      prior(normal(0, 1), class = b),
                      prior(exponential(1), class = sigma)),
            family = gaussian,
            iter = 6000, warmup = 2000,
            control = list(adapt_delta = 0.8, max_treedepth = 10),
            data = Patel_2021_f, chains = 4, cores = 4, file = "data/Derived/Ageing_axis_slopes/Patel_f_slope",
            #backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),
            refresh = 400, silent = 0, seed = 1)

Patel_f_slope <-
  as_draws_df(Patel_f_model) %>% 
  as_tibble() %>% 
  dplyr::select(b_e0_scaled) %>% 
  summarise(slope = mean(b_e0_scaled)) %>% pull(slope)

With regression coefficients found, we can use the following formula to calculate the angle (in radians) between the mean regression line and the x-axis:

\(\theta = tan^{-1}(\beta)\)

where \(\beta\) is the point estimate of the slope from each model posterior distribution.

Show the code
Arya_f_angle <- atan(Arya_f_slope)
Arya_m_angle <- atan(Arya_m_slope)
Huang_f_18_angle <- atan(Huang_f_18_slope)
Huang_m_18_angle <- atan(Huang_m_18_slope)
Huang_f_25_angle <- atan(Huang_f_25_slope)
Huang_m_25_angle <- atan(Huang_m_25_slope)
Huang_f_28_angle <- atan(Huang_f_28_slope)
Huang_m_28_angle <- atan(Huang_m_28_slope)
Wilson_f_1_angle <- atan(Wilson_f_slope_1)
Wilson_f_2_angle <- atan(Wilson_f_slope_2)
Durham_f_angle <- atan(Durham_f_slope)
Patel_f_angle <- atan(Patel_f_slope)

We then rotate the coordinate system of the life expectancy and lifespan equality plane clockwise by \(\theta\):

\[x' = xcos(\theta) - ysin(\theta),\] \[y' = xsin(\theta) + ycos(\theta),\] where \(x'\) and \(y'\) are the vectors of ageing rate and scaling, and \(x\) and \(y\) are life expectancy and lifespan equality.

Show the code
Arya_2010_f <-
  Arya_2010_f %>% 
  mutate(ageing_axis = h_scaled*cos(Arya_f_angle) - e0_scaled*sin(Arya_f_angle),
         scaling_axis = h_scaled*sin(Arya_f_angle) + e0_scaled*cos(Arya_f_angle))

Arya_2010_m <-
  Arya_2010_m %>% 
  mutate(ageing_axis = h_scaled*cos(Arya_m_angle) - e0_scaled*sin(Arya_m_angle),
         scaling_axis = h_scaled*sin(Arya_m_angle) + e0_scaled*cos(Arya_m_angle))

Huang_2020_f_18 <-
  Huang_2020_f_18 %>% 
  mutate(ageing_axis = h_scaled*cos(Huang_f_18_angle) - e0_scaled*sin(Huang_f_18_angle),
         scaling_axis = h_scaled*sin(Huang_f_18_angle) + e0_scaled*cos(Huang_f_18_angle))

Huang_2020_m_18 <-
  Huang_2020_m_18 %>% 
  mutate(ageing_axis = h_scaled*cos(Huang_m_18_angle) - e0_scaled*sin(Huang_m_18_angle),
         scaling_axis = h_scaled*sin(Huang_m_18_angle) + e0_scaled*cos(Huang_m_18_angle))

Huang_2020_f_25 <-
  Huang_2020_f_25 %>% 
  mutate(ageing_axis = h_scaled*cos(Huang_f_25_angle) - e0_scaled*sin(Huang_f_25_angle),
         scaling_axis = h_scaled*sin(Huang_f_25_angle) + e0_scaled*cos(Huang_f_25_angle))

Huang_2020_m_25 <-
  Huang_2020_m_25 %>% 
  mutate(ageing_axis = h_scaled*cos(Huang_m_25_angle) - e0_scaled*sin(Huang_m_25_angle),
         scaling_axis = h_scaled*sin(Huang_m_25_angle) + e0_scaled*cos(Huang_m_25_angle))

Huang_2020_f_28 <-
  Huang_2020_f_28 %>% 
  mutate(ageing_axis = h_scaled*cos(Huang_f_28_angle) - e0_scaled*sin(Huang_f_28_angle),
         scaling_axis = h_scaled*sin(Huang_f_28_angle) + e0_scaled*cos(Huang_f_28_angle))

Huang_2020_m_28 <-
  Huang_2020_m_28 %>% 
  mutate(ageing_axis = h_scaled*cos(Huang_m_28_angle) - e0_scaled*sin(Huang_m_28_angle),
         scaling_axis = h_scaled*sin(Huang_m_28_angle) + e0_scaled*cos(Huang_m_28_angle))

Wilson_2020_f_1 <-
  Wilson_2020_f_1 %>% 
  mutate(ageing_axis = h_scaled*cos(Wilson_f_1_angle) - e0_scaled*sin(Wilson_f_1_angle),
         scaling_axis = h_scaled*sin(Wilson_f_1_angle) + e0_scaled*cos(Wilson_f_1_angle))

Wilson_2020_f_2 <-
  Wilson_2020_f_2 %>% 
  mutate(ageing_axis = h_scaled*cos(Wilson_f_2_angle) - e0_scaled*sin(Wilson_f_2_angle),
         scaling_axis = h_scaled*sin(Wilson_f_2_angle) + e0_scaled*cos(Wilson_f_2_angle))

Durham_2014_f_adjusted <-
  Durham_2014_f_adjusted %>% 
  mutate(ageing_axis = h_scaled*cos(Durham_f_angle) - e0_scaled*sin(Durham_f_angle),
         scaling_axis = h_scaled*sin(Durham_f_angle) + e0_scaled*cos(Durham_f_angle))

Patel_2021_f <- 
  Patel_2021_f %>% 
  mutate(ageing_axis = h_scaled*cos(Patel_f_angle) - e0_scaled*sin(Patel_f_angle),
         scaling_axis = h_scaled*sin(Patel_f_angle) + e0_scaled*cos(Patel_f_angle))

Plot the line means, coloured by their value on the ageing rate axis.

Show the code
ageing_axis_plot <- function(data, study_title){
data %>% 
  ggplot(aes(x = e0, y = h)) +
  geom_point(aes(fill = ageing_axis), shape = 21, size = 4) +
  scale_fill_moma_c("Avedon", direction = -1, limits = c(-4, 4)) +
  geom_smooth(method = "lm") +
  #coord_cartesian(xlim = c(15, 120), ylim = c(0, 3.2)) +
  labs(fill = "Ageing rate index",
       x = "e0",
       y = "h",
       title = study_title) +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5))
}

a <- ageing_axis_plot(Arya_2010_f, "Arya 2010 females")
b <- ageing_axis_plot(Arya_2010_m, "Arya 2010 males")
c <- ageing_axis_plot(Huang_2020_f_18, "Huang 18C females")
d <- ageing_axis_plot(Huang_2020_m_18, "Huang 18C males")
e <- ageing_axis_plot(Huang_2020_f_25, "Huang 25C females")
f <- ageing_axis_plot(Huang_2020_m_25, "Huang 25C males")
g <- ageing_axis_plot(Huang_2020_f_28, "Huang 28C females")
h <- ageing_axis_plot(Huang_2020_m_28, "Huang 28C males")
i <- ageing_axis_plot(Wilson_2020_f_1, "Wilson 25C females 1")
j <- ageing_axis_plot(Wilson_2020_f_2, "Wilson 25C females 2")
k <- ageing_axis_plot(Durham_2014_f_adjusted, "Durham 25C females")
l <- ageing_axis_plot(Patel_2021_f, "Patel 23C females")

(a | b | c) / (d | e | f) / (g | h| i) / (j | k | l) + #guide_area()) +
  plot_layout(guides = 'collect')

Run univariate GWAS

Show the code
Arya_f_ageing <- prep_for_ageing_GWAS(Arya_2010_f)
Arya_m_ageing <- prep_for_ageing_GWAS(Arya_2010_m)
Huang_f_18_ageing <- prep_for_ageing_GWAS(Huang_2020_f_18)
Huang_m_18_ageing <- prep_for_ageing_GWAS(Huang_2020_m_18)
Huang_f_25_ageing <- prep_for_ageing_GWAS(Huang_2020_f_25)
Huang_m_25_ageing <- prep_for_ageing_GWAS(Huang_2020_m_25)
Huang_f_28_ageing <- prep_for_ageing_GWAS(Huang_2020_f_28)
Huang_m_28_ageing <- prep_for_ageing_GWAS(Huang_2020_m_28)
Wilson_f_ageing_1 <- prep_for_ageing_GWAS(Wilson_2020_f_1)
Wilson_f_ageing_2 <- prep_for_ageing_GWAS(Wilson_2020_f_2)
Durham_f_ageing <- prep_for_ageing_GWAS(Durham_2014_f_adjusted)
Patel_f_ageing <- prep_for_ageing_GWAS(Patel_2021_f)

Arya_f_scaling <- prep_for_scaling_GWAS(Arya_2010_f)
Arya_m_scaling <- prep_for_scaling_GWAS(Arya_2010_m)
Huang_f_18_scaling <- prep_for_scaling_GWAS(Huang_2020_f_18)
Huang_m_18_scaling <- prep_for_scaling_GWAS(Huang_2020_m_18)
Huang_f_25_scaling <- prep_for_scaling_GWAS(Huang_2020_f_25)
Huang_m_25_scaling <- prep_for_scaling_GWAS(Huang_2020_m_25)
Huang_f_28_scaling <- prep_for_scaling_GWAS(Huang_2020_f_28)
Huang_m_28_scaling <- prep_for_scaling_GWAS(Huang_2020_m_28)
Wilson_f_scaling_1 <- prep_for_scaling_GWAS(Wilson_2020_f_1)
Wilson_f_scaling_2 <- prep_for_scaling_GWAS(Wilson_2020_f_2)
Durham_f_scaling <- prep_for_scaling_GWAS(Durham_2014_f_adjusted)
Patel_f_scaling <- prep_for_scaling_GWAS(Patel_2021_f)

if(!file.exists("data/Derived/GWAS_results/Arya_f_ageing.tsv.gz")) {
run_GWAS(Arya_f_ageing)
run_GWAS(Arya_m_ageing)
run_GWAS(Huang_f_18_ageing)
run_GWAS(Huang_m_18_ageing)
run_GWAS(Huang_f_25_ageing)
run_GWAS(Huang_m_25_ageing)
run_GWAS(Huang_f_28_ageing)
run_GWAS(Huang_m_28_ageing)
run_GWAS(Wilson_f_ageing_1)
run_GWAS(Wilson_f_ageing_2)
run_GWAS(Durham_f_ageing)
run_GWAS(Patel_f_ageing)

run_GWAS(Arya_f_scaling)
run_GWAS(Arya_m_scaling)
run_GWAS(Huang_f_18_scaling)
run_GWAS(Huang_m_18_scaling)
run_GWAS(Huang_f_25_scaling)
run_GWAS(Huang_m_25_scaling)
run_GWAS(Huang_f_28_scaling)
run_GWAS(Huang_m_28_scaling)
run_GWAS(Wilson_f_scaling_1)
run_GWAS(Wilson_f_scaling_2)
run_GWAS(Durham_f_scaling)
run_GWAS(Patel_f_scaling)
}

Arya_f_ageing_GWAS <- read_tsv("data/Derived/GWAS_results/Arya_f_ageing.tsv.gz") 
Arya_m_ageing_GWAS <- read_tsv("data/Derived/GWAS_results/Arya_m_ageing.tsv.gz") 
Huang_f_18_ageing_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_f_18_ageing.tsv.gz")
Huang_m_18_ageing_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_m_18_ageing.tsv.gz")
Huang_f_25_ageing_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_f_25_ageing.tsv.gz")
Huang_m_25_ageing_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_m_25_ageing.tsv.gz")
Huang_f_28_ageing_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_f_28_ageing.tsv.gz")
Huang_m_28_ageing_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_m_28_ageing.tsv.gz")
Wilson_f_ageing_1_GWAS <- read_tsv("data/Derived/GWAS_results/Wilson_f_ageing_1.tsv.gz")
Wilson_f_ageing_2_GWAS <- read_tsv("data/Derived/GWAS_results/Wilson_f_ageing_2.tsv.gz")
Durham_f_ageing_GWAS <- read_tsv("data/Derived/GWAS_results/Durham_f_ageing.tsv.gz")
Patel_f_ageing_GWAS <- read_tsv("data/Derived/GWAS_results/Patel_f_ageing.tsv.gz")

Arya_f_scaling_GWAS <- read_tsv("data/Derived/GWAS_results/Arya_f_scaling.tsv.gz") 
Arya_m_scaling_GWAS <- read_tsv("data/Derived/GWAS_results/Arya_m_scaling.tsv.gz") 
Huang_f_18_scaling_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_f_18_scaling.tsv.gz")
Huang_m_18_scaling_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_m_18_scaling.tsv.gz")
Huang_f_25_scaling_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_f_25_scaling.tsv.gz")
Huang_m_25_scaling_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_m_25_scaling.tsv.gz")
Huang_f_28_scaling_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_f_28_scaling.tsv.gz")
Huang_m_28_scaling_GWAS <- read_tsv("data/Derived/GWAS_results/Huang_m_28_scaling.tsv.gz")
Wilson_f_scaling_1_GWAS <- read_tsv("data/Derived/GWAS_results/Wilson_f_scaling_1.tsv.gz")
Wilson_f_scaling_2_GWAS <- read_tsv("data/Derived/GWAS_results/Wilson_f_scaling_2.tsv.gz")
Durham_f_scaling_GWAS <- read_tsv("data/Derived/GWAS_results/Durham_f_scaling.tsv.gz")
Patel_f_scaling_GWAS <- read_tsv("data/Derived/GWAS_results/Patel_f_scaling.tsv.gz")

How many variants do we find that have notable associations with ageing rate?

Show the code
# filter down to sig associations

ageing_axis_table <-
bind_rows(
Arya_f_ageing_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Arya_f_ageing_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Arya et al 2010",
         Sex = "Female",
         Temperature = "25",
         `Mating status` = "Virgin") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Arya_m_ageing_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Arya_m_ageing_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Arya et al 2010",
         Sex = "Male",
         Temperature = "25",
         `Mating status` = "Virgin") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
  
Huang_f_18_ageing_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Huang_f_18_ageing_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Sex = "Female",
         Temperature = "18",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Huang_m_18_ageing_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Huang_m_18_ageing_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Sex = "Male",
         Temperature = "18",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Huang_f_25_ageing_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Huang_f_25_ageing_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Sex = "Female",
         Temperature = "25",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Huang_m_25_ageing_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Huang_m_25_ageing_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Sex = "Male",
         Temperature = "25",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Huang_f_28_ageing_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Huang_f_28_ageing_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Sex = "Female",
         Temperature = "28",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Huang_m_28_ageing_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Huang_m_28_ageing_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Sex = "Male",
         Temperature = "28",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Wilson_f_ageing_1_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Wilson_f_ageing_1_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Wilson et al 2020",
         Sex = "Female",
         Temperature = "25",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Wilson_f_ageing_2_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Wilson_f_ageing_2_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Wilson et al 2020",
         Sex = "Female",
         Temperature = "25",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Durham_f_ageing_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Durham_f_ageing_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Durham et al 2014",
         Sex = "Female",
         Temperature = "25",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Patel_f_ageing_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Patel_f_ageing_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Patel et al 2021",
         Sex = "Female",
         Temperature = "23",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)
) 

# how many unique variants have been detected?

ageing_axis_p_05_SNPs <-
  bind_rows(
    
    Arya_f_ageing_GWAS %>% 
      filter(P < 1e-05),
    
    Arya_m_ageing_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_f_18_ageing_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_m_18_ageing_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_f_25_ageing_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_m_25_ageing_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_f_28_ageing_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_m_28_ageing_GWAS %>% 
      filter(P < 1e-05),
    
    Wilson_f_ageing_1_GWAS %>% 
      filter(P < 1e-05),
    
    Wilson_f_ageing_2_GWAS %>% 
      filter(P < 1e-05),
    
    Durham_f_ageing_GWAS %>% 
      filter(P < 1e-05),
    
    Patel_f_ageing_GWAS %>% 
      filter(P < 1e-05)) %>% 
  distinct(SNP) %>% 
  nrow()


ageing_axis_table %>% 
  add_row(Study = "Totals",
          Temperature = "",
          `Mating status` = "",
          `p < 1e-05 variants` = ageing_axis_p_05_SNPs,
          `p < 1e-08 variants` = sum(ageing_axis_table$`p < 1e-08 variants`)) %>% 
  kable() %>% 
  kable_styling()
Study Sex Temperature Mating status p < 1e-05 variants p < 1e-08 variants
Arya et al 2010 Female 25 Virgin 18 0
Arya et al 2010 Male 25 Virgin 5 0
Huang et al 2020 Female 18 Mated 18 1
Huang et al 2020 Male 18 Mated 6 0
Huang et al 2020 Female 25 Mated 34 0
Huang et al 2020 Male 25 Mated 50 0
Huang et al 2020 Female 28 Mated 55 0
Huang et al 2020 Male 28 Mated 79 0
Wilson et al 2020 Female 25 Mated 35 0
Wilson et al 2020 Female 25 Mated 31 0
Durham et al 2014 Female 25 Mated 15 0
Patel et al 2021 Female 23 Mated 13 0
Totals NA 337 1

What about scaling?

Show the code
# filter down to sig associations

scaling_axis_table <-
bind_rows(
Arya_f_scaling_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Arya_f_scaling_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Arya et al 2010",
         Sex = "Female",
         Temperature = "25",
         `Mating status` = "Virgin") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Arya_m_scaling_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Arya_m_scaling_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Arya et al 2010",
         Sex = "Male",
         Temperature = "25",
         `Mating status` = "Virgin") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),
  
Huang_f_18_scaling_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Huang_f_18_scaling_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Sex = "Female",
         Temperature = "18",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Huang_m_18_scaling_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Huang_m_18_scaling_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Sex = "Male",
         Temperature = "18",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Huang_f_25_scaling_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Huang_f_25_scaling_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Sex = "Female",
         Temperature = "25",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Huang_m_25_scaling_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Huang_m_25_scaling_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Sex = "Male",
         Temperature = "25",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Huang_f_28_scaling_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Huang_f_28_scaling_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Sex = "Female",
         Temperature = "28",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Huang_m_28_scaling_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Huang_m_28_scaling_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Huang et al 2020",
         Sex = "Male",
         Temperature = "28",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Wilson_f_scaling_1_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Wilson_f_scaling_1_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Wilson et al 2020",
         Sex = "Female",
         Temperature = "25",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Wilson_f_scaling_2_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Wilson_f_scaling_2_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Wilson et al 2020",
         Sex = "Female",
         Temperature = "25",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Durham_f_scaling_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Durham_f_scaling_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Durham et al 2014",
         Sex = "Female",
         Temperature = "25",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),

Patel_f_scaling_GWAS %>% 
  filter(P < 1e-05) %>% 
  transmute(`p < 1e-05 variants` = n(),
         `p < 1e-08 variants` = nrow(filter(Patel_f_scaling_GWAS, P < 1e-08))) %>%
  distinct() %>% 
  mutate(Study = "Patel et al 2021",
         Sex = "Female",
         Temperature = "23",
         `Mating status` = "Mated") %>% 
  dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)
) 

# how many unique variants have been detected?

scaling_axis_p_05_SNPs <-
  bind_rows(
  
    Arya_f_scaling_GWAS %>% 
      filter(P < 1e-05),
    
    Arya_m_scaling_GWAS %>% 
      filter(P < 1e-05),
  
    Huang_f_18_scaling_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_m_18_scaling_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_f_25_scaling_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_m_25_scaling_GWAS %>% 
      filter(P < 1e-05),
  
    Huang_f_28_scaling_GWAS %>% 
      filter(P < 1e-05),
    
    Huang_m_28_scaling_GWAS %>% 
      filter(P < 1e-05),
    
     Wilson_f_scaling_1_GWAS %>% 
      filter(P < 1e-05),
    
    Wilson_f_scaling_2_GWAS %>% 
      filter(P < 1e-05),
    
    Durham_f_scaling_GWAS %>% 
      filter(P < 1e-05),
  
    Patel_f_scaling_GWAS %>% 
      filter(P < 1e-05)) %>% 
  distinct(SNP) %>% 
  nrow()


scaling_axis_table %>% 
  add_row(Study = "Totals",
          Temperature = "",
          `Mating status` = "",
          `p < 1e-05 variants` = scaling_axis_p_05_SNPs,
          `p < 1e-08 variants` = sum(scaling_axis_table$`p < 1e-08 variants`)) %>% 
  kable() %>% 
  kable_styling()
Study Sex Temperature Mating status p < 1e-05 variants p < 1e-08 variants
Arya et al 2010 Female 25 Virgin 30 0
Arya et al 2010 Male 25 Virgin 14 0
Huang et al 2020 Female 18 Mated 13 0
Huang et al 2020 Male 18 Mated 17 0
Huang et al 2020 Female 25 Mated 15 0
Huang et al 2020 Male 25 Mated 31 0
Huang et al 2020 Female 28 Mated 34 0
Huang et al 2020 Male 28 Mated 16 0
Wilson et al 2020 Female 25 Mated 19 0
Wilson et al 2020 Female 25 Mated 16 0
Durham et al 2014 Female 25 Mated 33 0
Patel et al 2021 Female 23 Mated 139 0
Totals NA 373 0

Run CPASSOC

Generate the genetic correlation matrix

Using SNP effect sizes, we calculate the genetic correlations between a) rates of ageing and b) scaling of mortality, measured in different environments.

Show the code
# use the BETA coefficients to build the SNP correlation matrix for the rate of ageing

SNP_ageing_axis_data <-
  bind_rows(
    Arya_f_ageing_GWAS %>% 
      mutate(Study = "Arya_2010", Temperature = 25, Sex = "Female"),
    
    Arya_m_ageing_GWAS %>% 
      mutate(Study = "Arya_2010", Temperature = 25, Sex = "Male"),
    
    Huang_f_18_ageing_GWAS %>% 
      mutate(Study = "Huang_2020", Temperature = 18, Sex = "Female"),
    
    Huang_m_18_ageing_GWAS %>% 
      mutate(Study = "Huang_2020", Temperature = 18, Sex = "Male"),
    
    Huang_f_25_ageing_GWAS %>% 
      mutate(Study = "Huang_2020", Temperature = 25, Sex = "Female"),
    
    Huang_m_25_ageing_GWAS %>% 
      mutate(Study = "Huang_2020", Temperature = 25, Sex = "Male"),
    
    Huang_f_28_ageing_GWAS %>% 
      mutate(Study = "Huang_2020", Temperature = 28, Sex = "Female"),
    
    Huang_m_28_ageing_GWAS %>% 
      mutate(Study = "Huang_2020", Temperature = 28, Sex = "Male"),
    
    Wilson_f_ageing_1_GWAS %>% 
      mutate(Study = "Wilson_2020_1", Temperature = 25, Sex = "Female"),
    
    Wilson_f_ageing_2_GWAS %>% 
      mutate(Study = "Wilson_2020_2", Temperature = 25, Sex = "Female"),
    
    Durham_f_ageing_GWAS %>% 
      mutate(Study = "Durham_2014", Temperature = 25, Sex = "Female"),
    
    Patel_f_ageing_GWAS %>% 
      mutate(Study = "Patel_2021", Temperature = 23, Sex = "Female")) %>% 
  dplyr::select(SNP, BETA, Study, Temperature, Sex) %>% 
  pivot_wider(values_from = BETA, names_from = c(Study, Temperature, Sex)) %>% 
  dplyr::select(-SNP) 


SNP_ageing_axis_corr_matrix <- cor(SNP_ageing_axis_data, use = "pairwise.complete.obs", method = "spearman")

# use the BETA coefficients to build the SNP correlation matrix for scaling

SNP_scaling_axis_data <-
 bind_rows(
    Arya_f_scaling_GWAS %>% 
      mutate(Study = "Arya_2010", Temperature = 25, Sex = "Female"),
    
    Arya_m_scaling_GWAS %>% 
      mutate(Study = "Arya_2010", Temperature = 25, Sex = "Male"),
  
    Huang_f_18_scaling_GWAS %>% 
      mutate(Study = "Huang_2020", Temperature = 18, Sex = "Female"),
    
    Huang_m_18_scaling_GWAS %>% 
      mutate(Study = "Huang_2020", Temperature = 18, Sex = "Male"),
    
    Huang_f_25_scaling_GWAS %>% 
      mutate(Study = "Huang_2020", Temperature = 25, Sex = "Female"),
    
    Huang_m_25_scaling_GWAS %>% 
      mutate(Study = "Huang_2020", Temperature = 25, Sex = "Male"),
  
    Huang_f_28_scaling_GWAS %>% 
      mutate(Study = "Huang_2020", Temperature = 28, Sex = "Female"),
    
    Huang_m_28_scaling_GWAS %>% 
      mutate(Study = "Huang_2020", Temperature = 28, Sex = "Male"),
    
     Wilson_f_scaling_1_GWAS %>% 
      mutate(Study = "Wilson_2020_1", Temperature = 25, Sex = "Female"),
    
    Wilson_f_scaling_2_GWAS %>% 
      mutate(Study = "Wilson_2020_2", Temperature = 25, Sex = "Female"),
    
    Durham_f_scaling_GWAS %>% 
      mutate(Study = "Durham", Temperature = 25, Sex = "Female"),
  
    Patel_f_scaling_GWAS %>% 
      mutate(Study = "Patel", Temperature = 23, Sex = "Female")) %>% 
  dplyr::select(SNP, BETA, Study, Temperature, Sex) %>% 
  pivot_wider(values_from = BETA, names_from = c(Study, Temperature, Sex)) %>% 
  dplyr::select(-SNP) 


SNP_scaling_axis_corr_matrix <- cor(SNP_scaling_axis_data, use = "pairwise.complete.obs", method = "spearman")

Calculate meta-analytic test statistics

The purpose of these meta-analyses is to search for SNPs that affect 1) the rate of ageing and 2) the scaling of the risk of mortality.

Run CPASSOC for the rate of ageing

Show the code
# rate of ageing

ageing_axis_Arya_f_T <- 
  Arya_f_ageing_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Arya_f = T)
    
ageing_axis_Arya_m_T <- 
  Arya_m_ageing_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Arya_m = T)

ageing_axis_Huang_f_18_T <- 
  Huang_f_18_ageing_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_f_18 = T)
  
ageing_axis_Huang_m_18_T <- 
  Huang_m_18_ageing_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_m_18 = T)

ageing_axis_Huang_f_25_T <- 
  Huang_f_25_ageing_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_f_25 = T)
  
ageing_axis_Huang_m_25_T <- 
  Huang_m_25_ageing_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_m_25 = T)

ageing_axis_Huang_f_28_T <- 
  Huang_f_28_ageing_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_f_28 = T)
  
ageing_axis_Huang_m_28_T <- 
  Huang_m_28_ageing_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_m_28 = T)
    
ageing_axis_Wilson_f_1_T <- 
  Wilson_f_ageing_1_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Wilson_f_1 = T)

ageing_axis_Wilson_f_2_T <- 
  Wilson_f_ageing_2_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Wilson_f_2 = T)

ageing_axis_Durham_f_T <- 
  Durham_f_ageing_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Durham_f = T)

ageing_axis_Patel_f_T <- 
  Patel_f_ageing_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Patel_f = T)
    

ageing_axis_effect_sizes <-
  ageing_axis_Arya_f_T %>%
  inner_join(ageing_axis_Arya_m_T, by = "SNP") %>%
  inner_join(ageing_axis_Huang_f_18_T, by = "SNP") %>% 
  inner_join(ageing_axis_Huang_m_18_T, by = "SNP") %>% 
  inner_join(ageing_axis_Huang_f_25_T, by = "SNP") %>% 
  inner_join(ageing_axis_Huang_m_25_T, by = "SNP") %>% 
  inner_join(ageing_axis_Huang_f_28_T, by = "SNP") %>% 
  inner_join(ageing_axis_Huang_m_28_T, by = "SNP") %>% 
  inner_join(ageing_axis_Wilson_f_1_T, by = "SNP") %>%
  inner_join(ageing_axis_Wilson_f_2_T, by = "SNP") %>%
  inner_join(ageing_axis_Durham_f_T, by = "SNP") %>%
  inner_join(ageing_axis_Patel_f_T, by = "SNP") 

ageing_axis_effect_size_values <-
  ageing_axis_effect_sizes %>% 
  dplyr::select(2:13)

Sample_size_ageing_axis <- c(165, 165, 183, 183, 186, 186, 177, 177, 161, 161, 176, 193)

if(!file.exists("data/Derived/GWAS_results/ageing_axis_meta_results.csv")) {

# run the homogeneous effect meta-analysis

S_hom <- SHom(ageing_axis_effect_size_values, Sample_size_ageing_axis, SNP_ageing_axis_corr_matrix)

# calculate meta-p-values and bind the two together with the SNP names

p_S_hom <- pchisq(S_hom, df = 1, ncp = 0, lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_hom) %>% 
  rename(meta_p_hom = value, 
         S_hom = ...2)

# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits e.g. if a SNP has a sex or environment opposite effect on lifespan

# estimate parameters of gamma distribution

para <- EstimateGamma(N = 1E4, Sample_size_ageing_axis, SNP_ageing_axis_corr_matrix);

S_het <- SHet(ageing_axis_effect_size_values, Sample_size_ageing_axis, SNP_ageing_axis_corr_matrix)

# obtain P-values of S_Het using the estimated gamma parameters
  
p_S_het <- pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_het) %>% 
  rename(meta_p_het = value, 
         S_het = ...2)


ageing_axis_meta_results <- 
  ageing_axis_effect_sizes %>% 
  bind_cols(p_S_hom,
            p_S_het) # add the unadjusted p values

write_csv(ageing_axis_meta_results, "data/Derived/GWAS_results/ageing_axis_meta_results.csv")

} else ageing_axis_meta_results <- read_csv("data/Derived/GWAS_results/ageing_axis_meta_results.csv")

Run CPASSOC for the scaling of mortality risk

Show the code
# scaling

scaling_axis_Arya_f_T <- 
  Arya_f_scaling_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Arya_f = T)
    
scaling_axis_Arya_m_T <- 
  Arya_m_scaling_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Arya_m = T)

scaling_axis_Huang_f_18_T <- 
  Huang_f_18_scaling_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_f_18 = T)
  
scaling_axis_Huang_m_18_T <- 
  Huang_m_18_scaling_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_m_18 = T)

scaling_axis_Huang_f_25_T <- 
  Huang_f_25_scaling_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_f_25 = T)
  
scaling_axis_Huang_m_25_T <- 
  Huang_m_25_scaling_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_m_25 = T)

scaling_axis_Huang_f_28_T <- 
  Huang_f_28_scaling_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_f_28 = T)
  
scaling_axis_Huang_m_28_T <- 
  Huang_m_28_scaling_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Huang_m_28 = T)
    
scaling_axis_Wilson_f_1_T <- 
  Wilson_f_scaling_1_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Wilson_f_1 = T)

scaling_axis_Wilson_f_2_T <- 
  Wilson_f_scaling_2_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Wilson_f_2 = T)

scaling_axis_Durham_f_T <- 
  Durham_f_scaling_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Durham_f = T)

scaling_axis_Patel_f_T <- 
  Patel_f_scaling_GWAS %>% 
  dplyr::select(SNP, T) %>% 
  rename(Patel_f = T)
    

scaling_axis_effect_sizes <-
  scaling_axis_Arya_f_T %>%
  inner_join(scaling_axis_Arya_m_T, by = "SNP") %>%
  inner_join(scaling_axis_Huang_f_18_T, by = "SNP") %>% 
  inner_join(scaling_axis_Huang_m_18_T, by = "SNP") %>% 
  inner_join(scaling_axis_Huang_f_25_T, by = "SNP") %>% 
  inner_join(scaling_axis_Huang_m_25_T, by = "SNP") %>% 
  inner_join(scaling_axis_Huang_f_28_T, by = "SNP") %>% 
  inner_join(scaling_axis_Huang_m_28_T, by = "SNP") %>% 
  inner_join(scaling_axis_Wilson_f_1_T, by = "SNP") %>%
  inner_join(scaling_axis_Wilson_f_2_T, by = "SNP") %>%
  inner_join(scaling_axis_Durham_f_T, by = "SNP") %>%
  inner_join(scaling_axis_Patel_f_T, by = "SNP") 


scaling_axis_effect_size_values <-
  scaling_axis_effect_sizes %>% 
  dplyr::select(2:13)

Sample_size_scaling_axis <- c(165, 165, 183, 183, 186, 186, 177, 177, 161, 161, 176, 193)

if(!file.exists("data/Derived/GWAS_results/scaling_axis_meta_results.csv")) {

# run the homogeneous effect meta-analysis

S_hom <- SHom(scaling_axis_effect_size_values, Sample_size_scaling_axis, SNP_scaling_axis_corr_matrix)

# calculate meta-p-values and bind the two together with the SNP names

p_S_hom <- pchisq(S_hom, df = 1, ncp = 0, lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_hom) %>% 
  rename(meta_p_hom = value, 
         S_hom = ...2)

# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits (e.g. if a SNP has a sex or enviornment opposite effect on lifespan)

# estimate parameters of gamma distribution

para <- EstimateGamma(N = 1E4, Sample_size_scaling_axis, SNP_scaling_axis_corr_matrix);

S_het <- SHet(scaling_axis_effect_size_values, Sample_size_scaling_axis, SNP_scaling_axis_corr_matrix)

# obtain P-values of S_Het using the estimated gamma parameters
  
p_S_het <- pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>% 
  as_tibble() %>% 
  bind_cols(S_het) %>% 
  rename(meta_p_het = value, 
         S_het = ...2)


scaling_axis_meta_results <- 
  scaling_axis_effect_sizes %>% 
  bind_cols(p_S_hom,
            p_S_het) # add the unadjusted p values

write_csv(scaling_axis_meta_results, "data/Derived/GWAS_results/scaling_axis_meta_results.csv")

} else scaling_axis_meta_results <- read_csv("data/Derived/GWAS_results/scaling_axis_meta_results.csv")

Visualise the results

We combine GWAS summary statistics calculated from ageing and scaling axis data measured across different contexts. It’s possible that some SNPs have G x E interactions that would lead to a heterogeneous effect across phenotypes. We therefore utilise the S_het calculated p-values.

First lets show the effect of CPASSOC on the number of variants found to be associated with the rate of ageing and the scaling of mortality risk.

Table XX. Genotype to phenotype associations detected by univariate GWAS, for the ageing rate and scaling of mortality axes. The total row shows the number of unique candidate variants identified across all studies.

Show the code
tibble(Analysis = c("CPASSOC", "Univariate", "CPASSOC", "Univariate"),
       Trait = c("Ageing rate", "Ageing rate", "Scaling", "Scaling"),
       `p < 1e-05 variants` = c(sum(ageing_axis_meta_results$meta_p_het < 1e-05),
                                  ageing_axis_p_05_SNPs,
                                  sum(scaling_axis_meta_results$meta_p_het < 1e-05),
                                  scaling_axis_p_05_SNPs),
       `p < 1e-08 variants` = c(sum(ageing_axis_meta_results$meta_p_het < 1e-08),
                                  sum(ageing_axis_table$`p < 1e-08 variants`),
                                  sum(scaling_axis_meta_results$meta_p_het < 1e-08),
                                  sum(scaling_axis_table$`p < 1e-08 variants`))) %>% 
  kable() %>% 
  kable_styling()
Analysis Trait p < 1e-05 variants p < 1e-08 variants
CPASSOC Ageing rate 213 28
Univariate Ageing rate 337 1
CPASSOC Scaling 312 58
Univariate Scaling 373 0

Table SX…variants that affect our proxy for the rate of ageing.

Show the code
# join gene annotations with the list of analysed variants 
# note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.

ageing_rate_genes <-
  ageing_axis_meta_results %>%
  filter(meta_p_het < 1e-08) %>% 
  dplyr::select(SNP, S_het, meta_p_het) %>%
  left_join(annotations %>% filter(distance.to.gene <= 500)) %>% 
  mutate(meta_p_het = round(meta_p_het*10^8, 5)/10^8,
         S_het = round(S_het, 3)) %>% 
  dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)

ageing_rate_genes %>% 
  my_data_table()

Table SX…variants that affect our proxy for scaling.

Show the code
scaling_genes <-
  scaling_axis_meta_results %>% 
  filter(meta_p_het < 1e-08) %>% 
  dplyr::select(SNP, S_het, meta_p_het) %>%
  left_join(annotations %>% filter(distance.to.gene <= 500)) %>% 
  mutate(meta_p_het = round(meta_p_het*10^8, 5)/10^8,
         S_het = round(S_het, 3)) %>% 
  dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)

scaling_genes %>% 
  my_data_table()

Now lets build some ‘Manhattan plots’ to show where these significant associations can be found:

Show the code
ageing_axis_results <- 
  ageing_axis_meta_results %>% 
  dplyr::select(SNP, meta_p_hom, meta_p_het) %>% 
  rename(P = meta_p_het)

scaling_axis_results <- 
  scaling_axis_meta_results %>% 
  dplyr::select(SNP, meta_p_hom, meta_p_het) %>% 
  rename(P = meta_p_het)

# plot the results using the manhattan plot custom function we defined earlier

ageing_axis_het_plot <- 
  build_manhattan_plot(ageing_axis_results) +
  labs(title = "Ageing rate") +
  theme(plot.title = element_text(size = 20, hjust = 0.5)) +
  coord_cartesian(ylim = c(0, 12.5))

scaling_axis_het_plot <- 
  build_manhattan_plot(scaling_axis_results) +
  labs(title = "Scaling of mortality") +
  theme(plot.title = element_text(size = 20, hjust = 0.5)) +
  coord_cartesian(ylim = c(0, 12.5))

ageing_axis_het_plot + scaling_axis_het_plot 

Plot the univariate effect sizes for each of the SNPs associated with the rate of ageing at the genome-wide significance threshold (p < \(0.05^{-8}\)) after CPASSOC.

Show the code
SNP_heatmap_ageing_axis <-
  ageing_axis_meta_results %>% filter(meta_p_het < 1e-08) %>% 
  arrange(meta_p_het) %>% dplyr::select(1:13) 

row_name <- SNP_heatmap_ageing_axis$SNP
SNP_heatmap_ageing_axis <- SNP_heatmap_ageing_axis %>% dplyr::select(-SNP) %>% as.matrix()
rownames(SNP_heatmap_ageing_axis) <- row_name

breaksList <- seq(-5, 5, by = 0.1)

annotation_SNPs <- 
  ageing_axis_meta_results %>% filter(meta_p_het < 1e-08) %>% dplyr::select(SNP) %>% 
  mutate(Chromosome = case_when(str_detect(SNP, "2L") ~ "2L",
                                str_detect(SNP, "2R") ~ "2R",
                                str_detect(SNP, "3L") ~ "3L",
                                str_detect(SNP, "3R") ~ "3R",
                                str_detect(SNP, "X") ~ "X"))

annotation_studies <- 
  tibble(Study = c("Arya_f",
                   "Arya_m",
                   "Huang_f_18",
                   "Huang_m_18",
                   "Huang_f_25",
                   "Huang_m_25",
                   "Huang_f_28",
                   "Huang_m_28",
                   "Wilson_f_1",
                   "Wilson_f_2",
                   "Durham_f_25",
                   "Patel_f_23"),
         Temperature = c("25",
                         "25",
                         "18",
                         "18",
                         "25",
                         "25",
                         "28",
                         "28",
                         "25",
                         "25",
                         "25",
                         "23")) %>% 
  mutate(Sex = case_when(str_detect(Study, "_f") ~ "Female",
                         .default = "Male"),
         Mating  = case_when(str_detect(Study, "Arya") ~ "NO",
                             str_detect(Study, "Huang") ~ "Throughout life",
                             str_detect(Study, "Wilson") ~ "Early life",
                             str_detect(Study, "Durham") ~ "Throughout life",
                             str_detect(Study, "Patel") ~ "Early life"),
         Author = str_extract(Study, ".*(?=\\_)"),
         Author = str_remove(Author, "_f"),
         Author = str_remove(Author, "_m"))


# create a study annotation column, need this to be a data.frame rather than a tibble for some reason 

Study_details <- annotation_studies %>%
  as.data.frame() %>% 
  dplyr::select(Study, Temperature, Mating)

my_categories <- data.frame(row.names = Study_details[, 1], Temperature = Study_details[, 2],
                            Mating = Study_details[, 3])

my_colors <- list(Temperature = c("18" = "#7bbcd5", # sailboat colours from pnw
                                  "23" = "#d0e2af",
                                  "25" = "#f5db99",
                                  "28" = "#e89c81"),
                  Mating = c("NO" = "#f8e3d1", # Shuksan from pnw
                             "Early life" = "#d7b1c5",
                             "Throughout life" = "#ac8eab"),
                  Chromosome = c("2L" = "#d8aedd", # lake colours from pnw
                                 "2R" = "#cb74ad",
                                 "3L" = "#11c2b5",
                                 "3R" = "#72e1e1",
                                 "X" = "#fbcc74"))
# create a SNP annotation column

SNP_details <- annotation_SNPs %>%
  as.data.frame()

my_SNP_categories <- data.frame(row.names = SNP_details[, 1], Chromosome = SNP_details[, 2])

my_col_names <- c("Arya et al females", "Arya et al males", "Huang et al females", 
                  "Huang et al males", "Huang et al females", "Huang et al males",
                  "Huang et al females", "Huang et al males", "Wilson et al females 1",
                  "Wilson et al females 2", "Durham et al females","Patel et al females")


pheatmap(SNP_heatmap_ageing_axis, breaks = breaksList, 
         main = "", legend_labels = c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),
         color = colorRampPalette(rev(met.brewer("Benedictus", direction = 1)))(length(breaksList)),
         legend = TRUE, cutree_rows = 4, cutree_cols = 4, 
         angle_col = 45, border_color = "white",
         annotation_col = my_categories, annotation_colors = my_colors, annotation_row = my_SNP_categories,
         fontsize = 8, labels_col = my_col_names)

Plot the univariate effect sizes for each of the SNPs associated with the scaling of mortality risk at the genome-wide significance threshold (p < \(0.05^{-8}\)) after CPASSOC.

Show the code
SNP_heatmap_scaling_axis <-
  scaling_axis_meta_results %>% filter(meta_p_het < 1e-08) %>% 
  arrange(meta_p_het) %>% dplyr::select(1:13) 

row_name <- SNP_heatmap_scaling_axis$SNP
SNP_heatmap_scaling_axis <- SNP_heatmap_scaling_axis %>% dplyr::select(-SNP) %>% as.matrix()
rownames(SNP_heatmap_scaling_axis) <- row_name

breaksList <- seq(-5, 5, by = 0.1)

annotation_SNPs <- 
  scaling_axis_meta_results %>% filter(meta_p_het < 1e-08) %>% dplyr::select(SNP) %>% 
  mutate(Chromosome = case_when(str_detect(SNP, "2L") ~ "2L",
                                str_detect(SNP, "2R") ~ "2R",
                                str_detect(SNP, "3L") ~ "3L",
                                str_detect(SNP, "3R") ~ "3R",
                                str_detect(SNP, "X") ~ "X"))

annotation_studies <- 
  tibble(Study = c("Arya_f",
                   "Arya_m",
                   "Huang_f_18",
                   "Huang_m_18",
                   "Huang_f_25",
                   "Huang_m_25",
                   "Huang_f_28",
                   "Huang_m_28",
                   "Wilson_f_1",
                   "Wilson_f_2",
                   "Durham_f_25",
                   "Patel_f_23"),
         Temperature = c("25",
                         "25",
                         "18",
                         "18",
                         "25",
                         "25",
                         "28",
                         "28",
                         "25",
                         "25",
                         "25",
                         "23")) %>% 
  mutate(Sex = case_when(str_detect(Study, "_f") ~ "Female",
                         .default = "Male"),
         Mating  = case_when(str_detect(Study, "Arya") ~ "NO",
                             str_detect(Study, "Huang") ~ "Throughout life",
                             str_detect(Study, "Wilson") ~ "Early life",
                             str_detect(Study, "Durham") ~ "Throughout life",
                             str_detect(Study, "Patel") ~ "Early life"),
         Author = str_extract(Study, ".*(?=\\_)"),
         Author = str_remove(Author, "_f"),
         Author = str_remove(Author, "_m"))


# create a study annotation column, need this to be a data.frame rather than a tibble for some reason 

Study_details <- annotation_studies %>%
  as.data.frame() %>% 
  dplyr::select(Study, Temperature, Mating)

my_categories <- data.frame(row.names = Study_details[, 1], Temperature = Study_details[, 2],
                            Mating = Study_details[, 3])

my_colors <- list(Temperature = c("18" = "#7bbcd5", # sailboat colours from pnw
                                  "23" = "#d0e2af",
                                  "25" = "#f5db99",
                                  "28" = "#e89c81"),
                  Mating = c("NO" = "#f8e3d1", # Shuksan from pnw
                             "Early life" = "#d7b1c5",
                             "Throughout life" = "#ac8eab"),
                  Chromosome = c("2L" = "#d8aedd", # lake colours from pnw
                                 "2R" = "#cb74ad",
                                 "3L" = "#11c2b5",
                                 "3R" = "#72e1e1",
                                 "X" = "#fbcc74"))
# create a SNP annotation column

SNP_details <- annotation_SNPs %>%
  as.data.frame()

my_SNP_categories <- data.frame(row.names = SNP_details[, 1], Chromosome = SNP_details[, 2])

my_col_names <- c("Arya et al females", "Arya et al males", "Huang et al females", 
                  "Huang et al males", "Huang et al females", "Huang et al males",
                  "Huang et al females", "Huang et al males", "Wilson et al females 1",
                  "Wilson et al females 2", "Durham et al females","Patel et al females")

pheatmap(SNP_heatmap_scaling_axis, breaks = breaksList, 
         main = "", legend_labels = c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),
         color = colorRampPalette(rev(met.brewer("Benedictus", direction = 1)))(length(breaksList)),
         legend = TRUE, cutree_rows = 5, cutree_cols = 4, 
         angle_col = 45, border_color = "white",
         annotation_col = my_categories, annotation_colors = my_colors, annotation_row = my_SNP_categories,
         fontsize = 8, labels_col = my_col_names)

There are several genomic regions where we detect multiple variants with very similar to identical effect sizes, indicating linkage disequilibrium. In these cases, the variant with the true causal effect can’t be identified (assuming there is a true phenotypic effect).